diff --git a/base/decoder_first.mlmodelc/analytics/coremldata.bin b/base/decoder_first.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3e8f9c0af49dd006b774b87efdd21dab1d27852f
--- /dev/null
+++ b/base/decoder_first.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3071377562292da4d34bf9d0ddcfe168fd10c3b81d4689d25c207179d2d58578
+size 243
diff --git a/base/decoder_first.mlmodelc/coremldata.bin b/base/decoder_first.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6cf1f7e086571f7d550b102f7789cd29797f552e
--- /dev/null
+++ b/base/decoder_first.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fbe1879a296cf22a0441826a3028ae2ec63bfc8e9ff019132681d2a93610324
+size 453
diff --git a/base/decoder_first.mlmodelc/metadata.json b/base/decoder_first.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..803aa42112f96bd42a9340d775d136f9f692eabb
--- /dev/null
+++ b/base/decoder_first.mlmodelc/metadata.json
@@ -0,0 +1,106 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "dummy",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.writeState" : 14,
+      "Shape" : 12,
+      "Ios18.linear" : 12,
+      "Identity" : 1,
+      "Ios18.gather" : 12,
+      "Ios18.concat" : 12,
+      "Ios18.sliceUpdate" : 14,
+      "Ios18.cast" : 24,
+      "Ios18.expandDims" : 12,
+      "Ios18.readState" : 14
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 6 × 1 × 448 × 512)",
+        "shortDescription" : "",
+        "shape" : "[6, 1, 448, 512]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 6 × 1 × 448 × 512)",
+        "shortDescription" : "",
+        "shape" : "[6, 1, 448, 512]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 6 × 1 × 1500 × 512)",
+        "shortDescription" : "",
+        "shape" : "[6, 1, 1500, 512]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 6 × 1 × 1500 × 512)",
+        "shortDescription" : "",
+        "shape" : "[6, 1, 1500, 512]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...1500 × 512",
+        "shapeRange" : "[[1, 1], [1, 1500], [512, 512]]",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 512)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1, 512]",
+        "name" : "audio_data",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_first",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/base/decoder_first.mlmodelc/model.mil b/base/decoder_first.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..0d10392624267a646ae587a1913bf492cccdee96
--- /dev/null
+++ b/base/decoder_first.mlmodelc/model.mil
@@ -0,0 +1,369 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [1, ?, 512]> audio_data, state<tensor<fp16, [6, 1, 448, 512]>> k_cache1, state<tensor<fp16, [6, 1, 1500, 512]>> k_cache2, state<tensor<fp16, [6, 1, 448, 512]>> v_cache1, state<tensor<fp16, [6, 1, 1500, 512]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 512]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [512, 512]]}})))] {
+            tensor<fp16, [1, ?, 512]> dummy = identity(x = audio_data)[name = string("identity_0")];
+            tensor<fp16, [6, 1, 448, 512]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor<fp16, [6, 1, 448, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_14_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_15_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<fp16, [6, 1, 1500, 512]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<fp16, [512, 512]> var_79_to_fp16 = const()[name = string("op_79_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2752640)))];
+            tensor<fp16, [512]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3276992)))];
+            tensor<fp16, [1, ?, 512]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_79_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [512, 512]> var_83_to_fp16 = const()[name = string("op_83_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3278080)))];
+            tensor<fp16, [512]> var_84_to_fp16 = const()[name = string("op_84_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3802432)))];
+            tensor<fp16, [1, ?, 512]> linear_1_cast_fp16 = linear(bias = var_84_to_fp16, weight = var_83_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")];
+            tensor<int32, [3]> var_86_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_86_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_86_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_86_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [3]> var_86_shape_cast_fp16_to_int16 = cast(dtype = var_86_shape_cast_fp16_to_int16_dtype_0, x = var_86_shape_cast_fp16)[name = string("cast_43")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_86_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_42")];
+            tensor<int32, [1]> expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")];
+            tensor<int32, [4]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
+            bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_16_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> coreml_update_state_16 = read_state(input = k_cache2)[name = string("coreml_update_state_16")];
+            tensor<int32, [3]> var_91_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_91_shape_cast_fp16")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_91_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_91_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_91_shape_cast_fp16_to_uint16 = cast(dtype = var_91_shape_cast_fp16_to_uint16_dtype_0, x = var_91_shape_cast_fp16)[name = string("cast_41")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_91_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_40")];
+            tensor<int32, [1]> expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")];
+            tensor<int32, [4]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)];
+            bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_17_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> coreml_update_state_17 = read_state(input = v_cache2)[name = string("coreml_update_state_17")];
+            tensor<fp16, [512, 512]> var_113_to_fp16 = const()[name = string("op_113_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3803520)))];
+            tensor<fp16, [1, ?, 512]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_113_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")];
+            tensor<fp16, [512, 512]> var_117_to_fp16 = const()[name = string("op_117_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4327872)))];
+            tensor<fp16, [512]> var_118_to_fp16 = const()[name = string("op_118_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4852224)))];
+            tensor<fp16, [1, ?, 512]> linear_3_cast_fp16 = linear(bias = var_118_to_fp16, weight = var_117_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")];
+            tensor<int32, [3]> var_120_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_120_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_120_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_120_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_120_shape_cast_fp16_to_uint16 = cast(dtype = var_120_shape_cast_fp16_to_uint16_dtype_0, x = var_120_shape_cast_fp16)[name = string("cast_39")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_120_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_38")];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")];
+            tensor<int32, [4]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)];
+            bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_16)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_18_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> coreml_update_state_18 = read_state(input = k_cache2)[name = string("coreml_update_state_18")];
+            tensor<int32, [3]> var_125_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_125_shape_cast_fp16")];
+            int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)];
+            int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)];
+            bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)];
+            string var_125_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_125_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_125_shape_cast_fp16_to_uint16 = cast(dtype = var_125_shape_cast_fp16_to_uint16_dtype_0, x = var_125_shape_cast_fp16)[name = string("cast_37")];
+            uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_125_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")];
+            string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_36")];
+            tensor<int32, [1]> expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")];
+            tensor<int32, [4]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_17)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_19_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> coreml_update_state_19 = read_state(input = v_cache2)[name = string("coreml_update_state_19")];
+            tensor<fp16, [512, 512]> var_147_to_fp16 = const()[name = string("op_147_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4853312)))];
+            tensor<fp16, [1, ?, 512]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_147_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")];
+            tensor<fp16, [512, 512]> var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5377664)))];
+            tensor<fp16, [512]> var_152_to_fp16 = const()[name = string("op_152_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5902016)))];
+            tensor<fp16, [1, ?, 512]> linear_5_cast_fp16 = linear(bias = var_152_to_fp16, weight = var_151_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")];
+            tensor<int32, [3]> var_154_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_154_shape_cast_fp16")];
+            int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)];
+            int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)];
+            bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)];
+            string var_154_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_154_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_154_shape_cast_fp16_to_uint16 = cast(dtype = var_154_shape_cast_fp16_to_uint16_dtype_0, x = var_154_shape_cast_fp16)[name = string("cast_35")];
+            uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_154_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")];
+            string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_34")];
+            tensor<int32, [1]> expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")];
+            tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)];
+            bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_18)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_20_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> coreml_update_state_20 = read_state(input = k_cache2)[name = string("coreml_update_state_20")];
+            tensor<int32, [3]> var_159_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_159_shape_cast_fp16")];
+            int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)];
+            int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)];
+            bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)];
+            string var_159_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_159_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_159_shape_cast_fp16_to_uint16 = cast(dtype = var_159_shape_cast_fp16_to_uint16_dtype_0, x = var_159_shape_cast_fp16)[name = string("cast_33")];
+            uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_159_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")];
+            string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_32")];
+            tensor<int32, [1]> expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")];
+            tensor<int32, [4]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)];
+            bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_19)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_21_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> coreml_update_state_21 = read_state(input = v_cache2)[name = string("coreml_update_state_21")];
+            tensor<fp16, [512, 512]> var_181_to_fp16 = const()[name = string("op_181_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5903104)))];
+            tensor<fp16, [1, ?, 512]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_181_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [512, 512]> var_185_to_fp16 = const()[name = string("op_185_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6427456)))];
+            tensor<fp16, [512]> var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6951808)))];
+            tensor<fp16, [1, ?, 512]> linear_7_cast_fp16 = linear(bias = var_186_to_fp16, weight = var_185_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")];
+            tensor<int32, [3]> var_188_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_188_shape_cast_fp16")];
+            int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)];
+            int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)];
+            bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)];
+            string var_188_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_188_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_188_shape_cast_fp16_to_uint16 = cast(dtype = var_188_shape_cast_fp16_to_uint16_dtype_0, x = var_188_shape_cast_fp16)[name = string("cast_31")];
+            uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_188_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")];
+            string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_30")];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")];
+            tensor<int32, [4]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)];
+            bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_20)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_22_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> coreml_update_state_22 = read_state(input = k_cache2)[name = string("coreml_update_state_22")];
+            tensor<int32, [3]> var_193_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_193_shape_cast_fp16")];
+            int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)];
+            int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)];
+            bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)];
+            string var_193_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_193_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_193_shape_cast_fp16_to_uint16 = cast(dtype = var_193_shape_cast_fp16_to_uint16_dtype_0, x = var_193_shape_cast_fp16)[name = string("cast_29")];
+            uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_193_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")];
+            string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_28")];
+            tensor<int32, [1]> expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")];
+            tensor<int32, [4]> concat_26 = const()[name = string("concat_26"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_21)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_23_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> coreml_update_state_23 = read_state(input = v_cache2)[name = string("coreml_update_state_23")];
+            tensor<fp16, [512, 512]> var_215_to_fp16 = const()[name = string("op_215_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6952896)))];
+            tensor<fp16, [1, ?, 512]> linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_215_to_fp16, x = audio_data)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [512, 512]> var_219_to_fp16 = const()[name = string("op_219_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7477248)))];
+            tensor<fp16, [512]> var_220_to_fp16 = const()[name = string("op_220_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8001600)))];
+            tensor<fp16, [1, ?, 512]> linear_9_cast_fp16 = linear(bias = var_220_to_fp16, weight = var_219_to_fp16, x = audio_data)[name = string("linear_9_cast_fp16")];
+            tensor<int32, [3]> var_222_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_222_shape_cast_fp16")];
+            int32 gather_8_axis_0 = const()[name = string("gather_8_axis_0"), val = int32(0)];
+            int32 gather_8_batch_dims_0 = const()[name = string("gather_8_batch_dims_0"), val = int32(0)];
+            bool gather_8_validate_indices_0 = const()[name = string("gather_8_validate_indices_0"), val = bool(false)];
+            string var_222_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_222_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_8_to_uint16 = const()[name = string("select_8_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_222_shape_cast_fp16_to_uint16 = cast(dtype = var_222_shape_cast_fp16_to_uint16_dtype_0, x = var_222_shape_cast_fp16)[name = string("cast_27")];
+            uint16 gather_8_cast_uint16 = gather(axis = gather_8_axis_0, batch_dims = gather_8_batch_dims_0, indices = select_8_to_uint16, validate_indices = gather_8_validate_indices_0, x = var_222_shape_cast_fp16_to_uint16)[name = string("gather_8_cast_uint16")];
+            string gather_8_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_8_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_43_axes_0 = const()[name = string("expand_dims_43_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_8_cast_uint16_to_int32 = cast(dtype = gather_8_cast_uint16_to_int32_dtype_0, x = gather_8_cast_uint16)[name = string("cast_26")];
+            tensor<int32, [1]> expand_dims_43 = expand_dims(axes = expand_dims_43_axes_0, x = gather_8_cast_uint16_to_int32)[name = string("expand_dims_43")];
+            tensor<int32, [4]> concat_29 = const()[name = string("concat_29"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [1]> concat_30_values0_0 = const()[name = string("concat_30_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_30_values1_0 = const()[name = string("concat_30_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_30_values3_0 = const()[name = string("concat_30_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)];
+            bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (concat_30_values0_0, concat_30_values1_0, expand_dims_43, concat_30_values3_0))[name = string("concat_30")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> k_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_29, begin_mask = k_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_30, end_mask = k_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_5_stride_0, update = linear_8_cast_fp16, x = coreml_update_state_22)[name = string("k_cache2_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_5_cast_fp16, input = k_cache2)[name = string("coreml_update_state_24_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> coreml_update_state_24 = read_state(input = k_cache2)[name = string("coreml_update_state_24")];
+            tensor<int32, [3]> var_227_shape_cast_fp16 = shape(x = linear_9_cast_fp16)[name = string("op_227_shape_cast_fp16")];
+            int32 gather_9_axis_0 = const()[name = string("gather_9_axis_0"), val = int32(0)];
+            int32 gather_9_batch_dims_0 = const()[name = string("gather_9_batch_dims_0"), val = int32(0)];
+            bool gather_9_validate_indices_0 = const()[name = string("gather_9_validate_indices_0"), val = bool(false)];
+            string var_227_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_227_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_9_to_uint16 = const()[name = string("select_9_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_227_shape_cast_fp16_to_uint16 = cast(dtype = var_227_shape_cast_fp16_to_uint16_dtype_0, x = var_227_shape_cast_fp16)[name = string("cast_25")];
+            uint16 gather_9_cast_uint16 = gather(axis = gather_9_axis_0, batch_dims = gather_9_batch_dims_0, indices = select_9_to_uint16, validate_indices = gather_9_validate_indices_0, x = var_227_shape_cast_fp16_to_uint16)[name = string("gather_9_cast_uint16")];
+            string gather_9_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_9_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_47_axes_0 = const()[name = string("expand_dims_47_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_9_cast_uint16_to_int32 = cast(dtype = gather_9_cast_uint16_to_int32_dtype_0, x = gather_9_cast_uint16)[name = string("cast_24")];
+            tensor<int32, [1]> expand_dims_47 = expand_dims(axes = expand_dims_47_axes_0, x = gather_9_cast_uint16_to_int32)[name = string("expand_dims_47")];
+            tensor<int32, [4]> concat_32 = const()[name = string("concat_32"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [1]> concat_33_values0_0 = const()[name = string("concat_33_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_33_values1_0 = const()[name = string("concat_33_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_33_values3_0 = const()[name = string("concat_33_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)];
+            bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (concat_33_values0_0, concat_33_values1_0, expand_dims_47, concat_33_values3_0))[name = string("concat_33")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> v_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_32, begin_mask = v_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_33, end_mask = v_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_5_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_23)[name = string("v_cache2_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_5_cast_fp16, input = v_cache2)[name = string("coreml_update_state_25_write_state")];
+            tensor<fp16, [6, 1, 1500, 512]> coreml_update_state_25 = read_state(input = v_cache2)[name = string("coreml_update_state_25")];
+            tensor<fp16, [512, 512]> var_249_to_fp16 = const()[name = string("op_249_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8002688)))];
+            tensor<fp16, [1, ?, 512]> linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_249_to_fp16, x = audio_data)[name = string("linear_10_cast_fp16")];
+            tensor<fp16, [512, 512]> var_253_to_fp16 = const()[name = string("op_253_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8527040)))];
+            tensor<fp16, [512]> var_254_to_fp16 = const()[name = string("op_254_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9051392)))];
+            tensor<fp16, [1, ?, 512]> linear_11_cast_fp16 = linear(bias = var_254_to_fp16, weight = var_253_to_fp16, x = audio_data)[name = string("linear_11_cast_fp16")];
+            tensor<int32, [3]> var_256_shape_cast_fp16 = shape(x = linear_10_cast_fp16)[name = string("op_256_shape_cast_fp16")];
+            int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)];
+            int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)];
+            bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)];
+            string var_256_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_256_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_256_shape_cast_fp16_to_uint16 = cast(dtype = var_256_shape_cast_fp16_to_uint16_dtype_0, x = var_256_shape_cast_fp16)[name = string("cast_23")];
+            uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_256_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")];
+            string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_22")];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = gather_10_cast_uint16_to_int32)[name = string("expand_dims_51")];
+            tensor<int32, [4]> concat_35 = const()[name = string("concat_35"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [1]> concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)];
+            bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, expand_dims_51, concat_36_values3_0))[name = string("concat_36")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> k_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_35, begin_mask = k_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_36, end_mask = k_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_6_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_24)[name = string("k_cache2_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_6_cast_fp16, input = k_cache2)[name = string("coreml_update_state_26_write_state")];
+            tensor<int32, [3]> var_261_shape_cast_fp16 = shape(x = linear_11_cast_fp16)[name = string("op_261_shape_cast_fp16")];
+            int32 gather_11_axis_0 = const()[name = string("gather_11_axis_0"), val = int32(0)];
+            int32 gather_11_batch_dims_0 = const()[name = string("gather_11_batch_dims_0"), val = int32(0)];
+            bool gather_11_validate_indices_0 = const()[name = string("gather_11_validate_indices_0"), val = bool(false)];
+            string var_261_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_261_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_11_to_uint16 = const()[name = string("select_11_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_261_shape_cast_fp16_to_uint16 = cast(dtype = var_261_shape_cast_fp16_to_uint16_dtype_0, x = var_261_shape_cast_fp16)[name = string("cast_21")];
+            uint16 gather_11_cast_uint16 = gather(axis = gather_11_axis_0, batch_dims = gather_11_batch_dims_0, indices = select_11_to_uint16, validate_indices = gather_11_validate_indices_0, x = var_261_shape_cast_fp16_to_uint16)[name = string("gather_11_cast_uint16")];
+            string gather_11_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_11_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_55_axes_0 = const()[name = string("expand_dims_55_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_11_cast_uint16_to_int32 = cast(dtype = gather_11_cast_uint16_to_int32_dtype_0, x = gather_11_cast_uint16)[name = string("cast_20")];
+            tensor<int32, [1]> expand_dims_55 = expand_dims(axes = expand_dims_55_axes_0, x = gather_11_cast_uint16_to_int32)[name = string("expand_dims_55")];
+            tensor<int32, [4]> concat_38 = const()[name = string("concat_38"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [1]> concat_39_values0_0 = const()[name = string("concat_39_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)];
+            bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (concat_39_values0_0, concat_39_values1_0, expand_dims_55, concat_39_values3_0))[name = string("concat_39")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 1500, 512]> v_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_38, begin_mask = v_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_39, end_mask = v_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_6_stride_0, update = linear_11_cast_fp16, x = coreml_update_state_25)[name = string("v_cache2_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_6_cast_fp16, input = v_cache2)[name = string("coreml_update_state_27_write_state")];
+        } -> (dummy);
+}
\ No newline at end of file
diff --git a/base/decoder_first.mlmodelc/weights/weight.bin b/base/decoder_first.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2a89c367089fa47f07e94e2937e6788cfa061906
--- /dev/null
+++ b/base/decoder_first.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fdbcff86cdfe9e0b8842ad4bc1af8ebbf22082b1d0342a8304023f63dd3663f
+size 9052480
diff --git a/base/decoder_second.mlmodelc/analytics/coremldata.bin b/base/decoder_second.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..85a8ab6e56764bfadaffbcb284f8d74e9b9094c2
--- /dev/null
+++ b/base/decoder_second.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68a3b3426587d83e56d3286cc0b733c9b8a5bff6b1ad6f9e1789a3cb55164455
+size 243
diff --git a/base/decoder_second.mlmodelc/coremldata.bin b/base/decoder_second.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..da712e28a355163fdbe3915834dcbfd38aad3b6d
--- /dev/null
+++ b/base/decoder_second.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6c0272d581c200e0ab4f29c687e4a7b49152e241cea335fa6faa6a430a460b6
+size 487
diff --git a/base/decoder_second.mlmodelc/metadata.json b/base/decoder_second.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..daf955d1563d226d2c0b3d6145074d9fb2f6df38
--- /dev/null
+++ b/base/decoder_second.mlmodelc/metadata.json
@@ -0,0 +1,127 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.linear" : 49,
+      "Ios18.readState" : 14,
+      "Ios18.expandDims" : 7,
+      "Ios18.sub" : 1,
+      "Ios18.matmul" : 24,
+      "Ios18.gelu" : 6,
+      "Ios18.gather" : 9,
+      "Ios18.concat" : 32,
+      "Shape" : 8,
+      "Ios18.add" : 31,
+      "Ios18.sliceUpdate" : 24,
+      "Ios18.sliceByIndex" : 49,
+      "Ios18.layerNorm" : 19,
+      "Ios18.cast" : 16,
+      "Ios18.transpose" : 48,
+      "Ios18.writeState" : 12,
+      "Ios18.reshape" : 48,
+      "Ios18.softmax" : 12,
+      "Ios18.mul" : 24
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 6 × 1 × 448 × 512)",
+        "shortDescription" : "",
+        "shape" : "[6, 1, 448, 512]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 6 × 1 × 448 × 512)",
+        "shortDescription" : "",
+        "shape" : "[6, 1, 448, 512]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 6 × 1 × 1500 × 512)",
+        "shortDescription" : "",
+        "shape" : "[6, 1, 1500, 512]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 6 × 1 × 1500 × 512)",
+        "shortDescription" : "",
+        "shape" : "[6, 1, 1500, 512]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Int32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Int32 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "token_data",
+        "shortDescription" : ""
+      },
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Float16 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "offset_mask",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_second",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/base/decoder_second.mlmodelc/model.mil b/base/decoder_second.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..7ba4b7f3fac4bf29423197cd240f630d692f897c
--- /dev/null
+++ b/base/decoder_second.mlmodelc/model.mil
@@ -0,0 +1,1228 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(state<tensor<fp16, [6, 1, 448, 512]>> k_cache1, state<tensor<fp16, [6, 1, 1500, 512]>> k_cache2, tensor<fp16, [1, ?]> offset_mask, tensor<int32, [1, ?]> token_data, state<tensor<fp16, [6, 1, 448, 512]>> v_cache1, state<tensor<fp16, [6, 1, 1500, 512]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] {
+            tensor<int32, [2]> var_26_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_26_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_26_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_26_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [2]> var_26_shape_cast_fp16_to_int16 = cast(dtype = var_26_shape_cast_fp16_to_int16_dtype_0, x = var_26_shape_cast_fp16)[name = string("cast_82")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_26_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [2]> var_30_shape = shape(x = token_data)[name = string("op_30_shape")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_30_shape_to_uint16_dtype_0 = const()[name = string("op_30_shape_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [2]> var_30_shape_to_uint16 = cast(dtype = var_30_shape_to_uint16_dtype_0, x = var_30_shape)[name = string("cast_80")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_30_shape_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_79")];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_81")];
+            int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")];
+            int32 var_50_axis_0 = const()[name = string("op_50_axis_0"), val = int32(0)];
+            int32 var_50_batch_dims_0 = const()[name = string("op_50_batch_dims_0"), val = int32(0)];
+            bool var_50_validate_indices_0 = const()[name = string("op_50_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [51865, 512]> token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor<fp16, [51865, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, ?, 512]> var_50_cast_fp16 = gather(axis = var_50_axis_0, batch_dims = var_50_batch_dims_0, indices = token_data, validate_indices = var_50_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_50_cast_fp16")];
+            int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)];
+            int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)];
+            bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")];
+            int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(512)];
+            int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)];
+            bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")];
+            tensor<bool, [2]> var_53_end_mask_0 = const()[name = string("op_53_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 512]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [448, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53109888)))];
+            tensor<fp16, [?, ?]> var_53_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_53_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_53_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_3_cast_fp16 = add(x = var_50_cast_fp16, y = var_53_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<fp16, [6, 1, 448, 512]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 512])];
+            tensor<bool, [4]> k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")];
+            tensor<fp16, [6, 1, 448, 512]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 512])];
+            tensor<bool, [4]> v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")];
+            tensor<fp16, [6, 1, 1500, 512]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<int32, [4]> k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 512])];
+            tensor<bool, [4]> k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")];
+            tensor<fp16, [6, 1, 1500, 512]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<int32, [4]> v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 512])];
+            tensor<bool, [4]> v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")];
+            int32 var_76 = const()[name = string("op_76"), val = int32(-1)];
+            tensor<int32, [1]> var_94_axes_0 = const()[name = string("op_94_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53568704)))];
+            tensor<fp16, [512]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53569792)))];
+            fp16 var_82_to_fp16 = const()[name = string("op_82_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 512]> var_94_cast_fp16 = layer_norm(axes = var_94_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_94_cast_fp16")];
+            tensor<fp16, [512, 512]> var_105_to_fp16 = const()[name = string("op_105_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53570880)))];
+            tensor<fp16, [512]> var_106_to_fp16 = const()[name = string("op_106_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54095232)))];
+            tensor<fp16, [1, ?, 512]> linear_0_cast_fp16 = linear(bias = var_106_to_fp16, weight = var_105_to_fp16, x = var_94_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [512, 512]> var_109_to_fp16 = const()[name = string("op_109_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54096320)))];
+            tensor<fp16, [512]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54620672)))];
+            tensor<fp16, [1, ?, 512]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_109_to_fp16, x = var_94_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [512, 512]> var_113_to_fp16 = const()[name = string("op_113_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54621760)))];
+            tensor<fp16, [512]> var_114_to_fp16 = const()[name = string("op_114_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55146112)))];
+            tensor<fp16, [1, ?, 512]> linear_2_cast_fp16 = linear(bias = var_114_to_fp16, weight = var_113_to_fp16, x = var_94_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [3]> var_116_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_116_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_116_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_116_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_116_shape_cast_fp16_to_uint16 = cast(dtype = var_116_shape_cast_fp16_to_uint16_dtype_0, x = var_116_shape_cast_fp16)[name = string("cast_78")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_116_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_77")];
+            int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")];
+            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")];
+            tensor<int32, [1]> expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")];
+            tensor<int32, [1]> concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)];
+            bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")];
+            tensor<int32, [1]> concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)];
+            bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_12_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> coreml_update_state_12 = read_state(input = k_cache1)[name = string("coreml_update_state_12")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_13_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> coreml_update_state_13 = read_state(input = v_cache1)[name = string("coreml_update_state_13")];
+            int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)];
+            int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(512)];
+            int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
+            bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")];
+            tensor<int32, [3]> var_132_begin_0 = const()[name = string("op_132_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_132_end_mask_0 = const()[name = string("op_132_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_132_cast_fp16 = slice_by_index(begin = var_132_begin_0, end = concat_10, end_mask = var_132_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_132_cast_fp16")];
+            tensor<int32, [3]> var_135_begin_0 = const()[name = string("op_135_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_135_end_mask_0 = const()[name = string("op_135_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_135_cast_fp16 = slice_by_index(begin = var_135_begin_0, end = concat_10, end_mask = var_135_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_135_cast_fp16")];
+            tensor<int32, [4]> concat_12x = const()[name = string("concat_12x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_145_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_145_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_3_cast_fp16 = mul(x = var_145_cast_fp16, y = const_30_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> concat_13x = const()[name = string("concat_13x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_152_cast_fp16 = reshape(shape = concat_13x, x = var_132_cast_fp16)[name = string("op_152_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> k_5_cast_fp16 = mul(x = var_152_cast_fp16, y = const_31_to_fp16)[name = string("k_5_cast_fp16")];
+            tensor<int32, [4]> concat_14x = const()[name = string("concat_14x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_159_cast_fp16 = reshape(shape = concat_14x, x = var_135_cast_fp16)[name = string("op_159_cast_fp16")];
+            tensor<int32, [4]> var_160 = const()[name = string("op_160"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, ?]> transpose_50 = transpose(perm = transpose_50_perm_0, x = k_5_cast_fp16)[name = string("transpose_118")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_49 = transpose(perm = transpose_49_perm_0, x = q_3_cast_fp16)[name = string("transpose_119")];
+            tensor<fp16, [1, 8, ?, ?]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_49, y = transpose_50)[name = string("qk_1_cast_fp16")];
+            int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")];
+            tensor<int32, [2]> var_163_begin_0 = const()[name = string("op_163_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_163_end_mask_0 = const()[name = string("op_163_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 448]> mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor<fp16, [448, 448]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55147200)))];
+            tensor<fp16, [?, 448]> var_163_cast_fp16 = slice_by_index(begin = var_163_begin_0, end = concat_15, end_mask = var_163_end_mask_0, x = mask_to_fp16)[name = string("op_163_cast_fp16")];
+            int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)];
+            int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)];
+            bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")];
+            tensor<int32, [2]> var_164_begin_0 = const()[name = string("op_164_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_164_end_mask_0 = const()[name = string("op_164_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_164_cast_fp16 = slice_by_index(begin = var_164_begin_0, end = concat_16, end_mask = var_164_end_mask_0, x = var_163_cast_fp16)[name = string("op_164_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_164_cast_fp16)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> var_167_cast_fp16 = softmax(axis = var_76, x = qk_3_cast_fp16)[name = string("op_167_cast_fp16")];
+            bool var_169_transpose_x_0 = const()[name = string("op_169_transpose_x_0"), val = bool(false)];
+            bool var_169_transpose_y_0 = const()[name = string("op_169_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, ?, 64]> v_5_cast_fp16 = transpose(perm = var_160, x = var_159_cast_fp16)[name = string("transpose_120")];
+            tensor<fp16, [1, 8, ?, 64]> var_169_cast_fp16 = matmul(transpose_x = var_169_transpose_x_0, transpose_y = var_169_transpose_y_0, x = var_167_cast_fp16, y = v_5_cast_fp16)[name = string("op_169_cast_fp16")];
+            tensor<int32, [4]> var_170 = const()[name = string("op_170"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_17x = const()[name = string("concat_17x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_171_cast_fp16 = transpose(perm = var_170, x = var_169_cast_fp16)[name = string("transpose_117")];
+            tensor<fp16, [1, ?, 512]> x_7_cast_fp16 = reshape(shape = concat_17x, x = var_171_cast_fp16)[name = string("x_7_cast_fp16")];
+            tensor<fp16, [512, 512]> var_175_to_fp16 = const()[name = string("op_175_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55548672)))];
+            tensor<fp16, [512]> var_176_to_fp16 = const()[name = string("op_176_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56073024)))];
+            tensor<fp16, [1, ?, 512]> linear_3_cast_fp16 = linear(bias = var_176_to_fp16, weight = var_175_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")];
+            tensor<int32, [1]> var_183_axes_0 = const()[name = string("op_183_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56074112)))];
+            tensor<fp16, [512]> blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56075200)))];
+            tensor<fp16, [1, ?, 512]> var_183_cast_fp16 = layer_norm(axes = var_183_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_183_cast_fp16")];
+            tensor<fp16, [512, 512]> var_192_to_fp16 = const()[name = string("op_192_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56076288)))];
+            tensor<fp16, [512]> var_193_to_fp16 = const()[name = string("op_193_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56600640)))];
+            tensor<fp16, [1, ?, 512]> linear_4_cast_fp16 = linear(bias = var_193_to_fp16, weight = var_192_to_fp16, x = var_183_cast_fp16)[name = string("linear_4_cast_fp16")];
+            tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor<fp16, [1, 1500, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56601728)))];
+            tensor<fp16, [1, 1500, 512]> k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_22x = const()[name = string("concat_22x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_213_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_213_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_7_cast_fp16 = mul(x = var_213_cast_fp16, y = const_32_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_219 = const()[name = string("op_219"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_220_cast_fp16 = reshape(shape = var_219, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_220_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_9_cast_fp16 = mul(x = var_220_cast_fp16, y = const_33_to_fp16)[name = string("k_9_cast_fp16")];
+            tensor<int32, [4]> var_226 = const()[name = string("op_226"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_227_cast_fp16 = reshape(shape = var_226, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_227_cast_fp16")];
+            tensor<int32, [4]> var_228 = const()[name = string("op_228"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_52 = transpose(perm = transpose_52_perm_0, x = k_9_cast_fp16)[name = string("transpose_114")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_51 = transpose(perm = transpose_51_perm_0, x = q_7_cast_fp16)[name = string("transpose_115")];
+            tensor<fp16, [1, 8, ?, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_51, y = transpose_52)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 8, ?, 1500]> var_232_cast_fp16 = softmax(axis = var_76, x = qk_5_cast_fp16)[name = string("op_232_cast_fp16")];
+            bool var_234_transpose_x_0 = const()[name = string("op_234_transpose_x_0"), val = bool(false)];
+            bool var_234_transpose_y_0 = const()[name = string("op_234_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_9_cast_fp16 = transpose(perm = var_228, x = var_227_cast_fp16)[name = string("transpose_116")];
+            tensor<fp16, [1, 8, ?, 64]> var_234_cast_fp16 = matmul(transpose_x = var_234_transpose_x_0, transpose_y = var_234_transpose_y_0, x = var_232_cast_fp16, y = v_9_cast_fp16)[name = string("op_234_cast_fp16")];
+            tensor<int32, [4]> var_235 = const()[name = string("op_235"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_23x = const()[name = string("concat_23x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_236_cast_fp16 = transpose(perm = var_235, x = var_234_cast_fp16)[name = string("transpose_113")];
+            tensor<fp16, [1, ?, 512]> x_13_cast_fp16 = reshape(shape = concat_23x, x = var_236_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<fp16, [512, 512]> var_240_to_fp16 = const()[name = string("op_240_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58137792)))];
+            tensor<fp16, [512]> var_241_to_fp16 = const()[name = string("op_241_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58662144)))];
+            tensor<fp16, [1, ?, 512]> linear_5_cast_fp16 = linear(bias = var_241_to_fp16, weight = var_240_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")];
+            tensor<int32, [1]> var_248_axes_0 = const()[name = string("op_248_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58663232)))];
+            tensor<fp16, [512]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58664320)))];
+            tensor<fp16, [1, ?, 512]> var_248_cast_fp16 = layer_norm(axes = var_248_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_248_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58665408)))];
+            tensor<fp16, [2048]> var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60762624)))];
+            tensor<fp16, [1, ?, 2048]> linear_6_cast_fp16 = linear(bias = var_258_to_fp16, weight = var_257_to_fp16, x = var_248_cast_fp16)[name = string("linear_6_cast_fp16")];
+            string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 2048]> x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_263_to_fp16 = const()[name = string("op_263_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60766784)))];
+            tensor<fp16, [512]> var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62864000)))];
+            tensor<fp16, [1, ?, 512]> linear_7_cast_fp16 = linear(bias = var_264_to_fp16, weight = var_263_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 512])];
+            tensor<bool, [4]> k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_12)[name = string("k_cache_5_cast_fp16")];
+            tensor<int32, [4]> v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 512])];
+            tensor<bool, [4]> v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_13)[name = string("v_cache_5_cast_fp16")];
+            tensor<int32, [4]> k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 512])];
+            tensor<bool, [4]> k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")];
+            tensor<int32, [4]> v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 512])];
+            tensor<bool, [4]> v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")];
+            int32 var_287 = const()[name = string("op_287"), val = int32(-1)];
+            tensor<int32, [1]> var_305_axes_0 = const()[name = string("op_305_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62865088)))];
+            tensor<fp16, [512]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62866176)))];
+            fp16 var_293_to_fp16 = const()[name = string("op_293_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 512]> var_305_cast_fp16 = layer_norm(axes = var_305_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_305_cast_fp16")];
+            tensor<fp16, [512, 512]> var_316_to_fp16 = const()[name = string("op_316_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62867264)))];
+            tensor<fp16, [512]> var_317_to_fp16 = const()[name = string("op_317_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63391616)))];
+            tensor<fp16, [1, ?, 512]> linear_8_cast_fp16 = linear(bias = var_317_to_fp16, weight = var_316_to_fp16, x = var_305_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [512, 512]> var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63392704)))];
+            tensor<fp16, [1, ?, 512]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_320_to_fp16, x = var_305_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [512, 512]> var_324_to_fp16 = const()[name = string("op_324_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63917056)))];
+            tensor<fp16, [512]> var_325_to_fp16 = const()[name = string("op_325_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64441408)))];
+            tensor<fp16, [1, ?, 512]> linear_10_cast_fp16 = linear(bias = var_325_to_fp16, weight = var_324_to_fp16, x = var_305_cast_fp16)[name = string("linear_10_cast_fp16")];
+            tensor<int32, [3]> var_327_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_327_shape_cast_fp16")];
+            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
+            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
+            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
+            string var_327_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_327_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_327_shape_cast_fp16_to_uint16 = cast(dtype = var_327_shape_cast_fp16_to_uint16_dtype_0, x = var_327_shape_cast_fp16)[name = string("cast_76")];
+            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_327_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
+            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_75")];
+            int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")];
+            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")];
+            tensor<int32, [1]> concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor<int32, [1]>([1])];
+            int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
+            bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_12)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_14_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> coreml_update_state_14 = read_state(input = k_cache1)[name = string("coreml_update_state_14")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_13)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_15_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> coreml_update_state_15 = read_state(input = v_cache1)[name = string("coreml_update_state_15")];
+            int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)];
+            int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(512)];
+            int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)];
+            bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")];
+            tensor<int32, [3]> var_343_begin_0 = const()[name = string("op_343_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_343_end_mask_0 = const()[name = string("op_343_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = concat_32, end_mask = var_343_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_343_cast_fp16")];
+            tensor<int32, [3]> var_346_begin_0 = const()[name = string("op_346_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_346_end_mask_0 = const()[name = string("op_346_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = concat_32, end_mask = var_346_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_346_cast_fp16")];
+            tensor<int32, [4]> concat_34x = const()[name = string("concat_34x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_356_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_356_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_11_cast_fp16 = mul(x = var_356_cast_fp16, y = const_34_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> concat_35x = const()[name = string("concat_35x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_363_cast_fp16 = reshape(shape = concat_35x, x = var_343_cast_fp16)[name = string("op_363_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> k_15_cast_fp16 = mul(x = var_363_cast_fp16, y = const_35_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> concat_36x = const()[name = string("concat_36x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_370_cast_fp16 = reshape(shape = concat_36x, x = var_346_cast_fp16)[name = string("op_370_cast_fp16")];
+            tensor<int32, [4]> var_371 = const()[name = string("op_371"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, ?]> transpose_54 = transpose(perm = transpose_54_perm_0, x = k_15_cast_fp16)[name = string("transpose_110")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_53 = transpose(perm = transpose_53_perm_0, x = q_11_cast_fp16)[name = string("transpose_111")];
+            tensor<fp16, [1, 8, ?, ?]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_53, y = transpose_54)[name = string("qk_7_cast_fp16")];
+            int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)];
+            int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)];
+            bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")];
+            tensor<int32, [2]> var_374_begin_0 = const()[name = string("op_374_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_374_end_mask_0 = const()[name = string("op_374_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = concat_37, end_mask = var_374_end_mask_0, x = mask_to_fp16)[name = string("op_374_cast_fp16")];
+            int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)];
+            int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
+            bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")];
+            tensor<int32, [2]> var_375_begin_0 = const()[name = string("op_375_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_375_end_mask_0 = const()[name = string("op_375_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_375_cast_fp16 = slice_by_index(begin = var_375_begin_0, end = concat_38, end_mask = var_375_end_mask_0, x = var_374_cast_fp16)[name = string("op_375_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_375_cast_fp16)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> var_378_cast_fp16 = softmax(axis = var_287, x = qk_9_cast_fp16)[name = string("op_378_cast_fp16")];
+            bool var_380_transpose_x_0 = const()[name = string("op_380_transpose_x_0"), val = bool(false)];
+            bool var_380_transpose_y_0 = const()[name = string("op_380_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, ?, 64]> v_15_cast_fp16 = transpose(perm = var_371, x = var_370_cast_fp16)[name = string("transpose_112")];
+            tensor<fp16, [1, 8, ?, 64]> var_380_cast_fp16 = matmul(transpose_x = var_380_transpose_x_0, transpose_y = var_380_transpose_y_0, x = var_378_cast_fp16, y = v_15_cast_fp16)[name = string("op_380_cast_fp16")];
+            tensor<int32, [4]> var_381 = const()[name = string("op_381"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_39x = const()[name = string("concat_39x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_382_cast_fp16 = transpose(perm = var_381, x = var_380_cast_fp16)[name = string("transpose_109")];
+            tensor<fp16, [1, ?, 512]> x_25_cast_fp16 = reshape(shape = concat_39x, x = var_382_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<fp16, [512, 512]> var_386_to_fp16 = const()[name = string("op_386_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64442496)))];
+            tensor<fp16, [512]> var_387_to_fp16 = const()[name = string("op_387_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64966848)))];
+            tensor<fp16, [1, ?, 512]> linear_11_cast_fp16 = linear(bias = var_387_to_fp16, weight = var_386_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")];
+            tensor<int32, [1]> var_394_axes_0 = const()[name = string("op_394_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64967936)))];
+            tensor<fp16, [512]> blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64969024)))];
+            tensor<fp16, [1, ?, 512]> var_394_cast_fp16 = layer_norm(axes = var_394_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_394_cast_fp16")];
+            tensor<fp16, [512, 512]> var_403_to_fp16 = const()[name = string("op_403_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64970112)))];
+            tensor<fp16, [512]> var_404_to_fp16 = const()[name = string("op_404_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65494464)))];
+            tensor<fp16, [1, ?, 512]> linear_12_cast_fp16 = linear(bias = var_404_to_fp16, weight = var_403_to_fp16, x = var_394_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<int32, [3]> concat_40 = const()[name = string("concat_40"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_42 = const()[name = string("concat_42"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_43 = const()[name = string("concat_43"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_44x = const()[name = string("concat_44x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_424_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_424_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_36_to_fp16 = const()[name = string("const_36_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_15_cast_fp16 = mul(x = var_424_cast_fp16, y = const_36_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_430 = const()[name = string("op_430"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_431_cast_fp16 = reshape(shape = var_430, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_431_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_37_to_fp16 = const()[name = string("const_37_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_19_cast_fp16 = mul(x = var_431_cast_fp16, y = const_37_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_437 = const()[name = string("op_437"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_438_cast_fp16 = reshape(shape = var_437, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_438_cast_fp16")];
+            tensor<int32, [4]> var_439 = const()[name = string("op_439"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
+            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_56 = transpose(perm = transpose_56_perm_0, x = k_19_cast_fp16)[name = string("transpose_106")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_55 = transpose(perm = transpose_55_perm_0, x = q_15_cast_fp16)[name = string("transpose_107")];
+            tensor<fp16, [1, 8, ?, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_55, y = transpose_56)[name = string("qk_11_cast_fp16")];
+            tensor<fp16, [1, 8, ?, 1500]> var_443_cast_fp16 = softmax(axis = var_287, x = qk_11_cast_fp16)[name = string("op_443_cast_fp16")];
+            bool var_445_transpose_x_0 = const()[name = string("op_445_transpose_x_0"), val = bool(false)];
+            bool var_445_transpose_y_0 = const()[name = string("op_445_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_439, x = var_438_cast_fp16)[name = string("transpose_108")];
+            tensor<fp16, [1, 8, ?, 64]> var_445_cast_fp16 = matmul(transpose_x = var_445_transpose_x_0, transpose_y = var_445_transpose_y_0, x = var_443_cast_fp16, y = v_19_cast_fp16)[name = string("op_445_cast_fp16")];
+            tensor<int32, [4]> var_446 = const()[name = string("op_446"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_45x = const()[name = string("concat_45x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_447_cast_fp16 = transpose(perm = var_446, x = var_445_cast_fp16)[name = string("transpose_105")];
+            tensor<fp16, [1, ?, 512]> x_31_cast_fp16 = reshape(shape = concat_45x, x = var_447_cast_fp16)[name = string("x_31_cast_fp16")];
+            tensor<fp16, [512, 512]> var_451_to_fp16 = const()[name = string("op_451_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65495552)))];
+            tensor<fp16, [512]> var_452_to_fp16 = const()[name = string("op_452_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66019904)))];
+            tensor<fp16, [1, ?, 512]> linear_13_cast_fp16 = linear(bias = var_452_to_fp16, weight = var_451_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")];
+            tensor<int32, [1]> var_459_axes_0 = const()[name = string("op_459_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66020992)))];
+            tensor<fp16, [512]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66022080)))];
+            tensor<fp16, [1, ?, 512]> var_459_cast_fp16 = layer_norm(axes = var_459_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_459_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_468_to_fp16 = const()[name = string("op_468_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66023168)))];
+            tensor<fp16, [2048]> var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68120384)))];
+            tensor<fp16, [1, ?, 2048]> linear_14_cast_fp16 = linear(bias = var_469_to_fp16, weight = var_468_to_fp16, x = var_459_cast_fp16)[name = string("linear_14_cast_fp16")];
+            string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 2048]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_474_to_fp16 = const()[name = string("op_474_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68124544)))];
+            tensor<fp16, [512]> var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70221760)))];
+            tensor<fp16, [1, ?, 512]> linear_15_cast_fp16 = linear(bias = var_475_to_fp16, weight = var_474_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")];
+            tensor<int32, [4]> k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 512])];
+            tensor<bool, [4]> k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_14)[name = string("k_cache_9_cast_fp16")];
+            tensor<int32, [4]> v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 512])];
+            tensor<bool, [4]> v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_15)[name = string("v_cache_9_cast_fp16")];
+            tensor<int32, [4]> k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 512])];
+            tensor<bool, [4]> k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")];
+            tensor<int32, [4]> v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 512])];
+            tensor<bool, [4]> v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")];
+            int32 var_498 = const()[name = string("op_498"), val = int32(-1)];
+            tensor<int32, [1]> var_516_axes_0 = const()[name = string("op_516_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70222848)))];
+            tensor<fp16, [512]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70223936)))];
+            fp16 var_504_to_fp16 = const()[name = string("op_504_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 512]> var_516_cast_fp16 = layer_norm(axes = var_516_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_516_cast_fp16")];
+            tensor<fp16, [512, 512]> var_527_to_fp16 = const()[name = string("op_527_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70225024)))];
+            tensor<fp16, [512]> var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70749376)))];
+            tensor<fp16, [1, ?, 512]> linear_16_cast_fp16 = linear(bias = var_528_to_fp16, weight = var_527_to_fp16, x = var_516_cast_fp16)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [512, 512]> var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70750464)))];
+            tensor<fp16, [1, ?, 512]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_531_to_fp16, x = var_516_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [512, 512]> var_535_to_fp16 = const()[name = string("op_535_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71274816)))];
+            tensor<fp16, [512]> var_536_to_fp16 = const()[name = string("op_536_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71799168)))];
+            tensor<fp16, [1, ?, 512]> linear_18_cast_fp16 = linear(bias = var_536_to_fp16, weight = var_535_to_fp16, x = var_516_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<int32, [3]> var_538_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_538_shape_cast_fp16")];
+            int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
+            int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
+            bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
+            string var_538_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_538_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_538_shape_cast_fp16_to_uint16 = cast(dtype = var_538_shape_cast_fp16_to_uint16_dtype_0, x = var_538_shape_cast_fp16)[name = string("cast_74")];
+            uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_538_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
+            string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_73")];
+            int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")];
+            tensor<int32, [1]> expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")];
+            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([2])];
+            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
+            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")];
+            tensor<int32, [1]> concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)];
+            bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_14)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_16_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> coreml_update_state_16 = read_state(input = k_cache1)[name = string("coreml_update_state_16")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_15)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_17_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> coreml_update_state_17 = read_state(input = v_cache1)[name = string("coreml_update_state_17")];
+            int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)];
+            int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(512)];
+            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
+            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")];
+            tensor<int32, [3]> var_554_begin_0 = const()[name = string("op_554_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_554_end_mask_0 = const()[name = string("op_554_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = concat_54, end_mask = var_554_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_554_cast_fp16")];
+            tensor<int32, [3]> var_557_begin_0 = const()[name = string("op_557_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_557_end_mask_0 = const()[name = string("op_557_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_557_cast_fp16 = slice_by_index(begin = var_557_begin_0, end = concat_54, end_mask = var_557_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_557_cast_fp16")];
+            tensor<int32, [4]> concat_56x = const()[name = string("concat_56x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_567_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_567_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_38_to_fp16 = const()[name = string("const_38_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_19_cast_fp16 = mul(x = var_567_cast_fp16, y = const_38_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> concat_57x = const()[name = string("concat_57x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_574_cast_fp16 = reshape(shape = concat_57x, x = var_554_cast_fp16)[name = string("op_574_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_39_to_fp16 = const()[name = string("const_39_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> k_25_cast_fp16 = mul(x = var_574_cast_fp16, y = const_39_to_fp16)[name = string("k_25_cast_fp16")];
+            tensor<int32, [4]> concat_58x = const()[name = string("concat_58x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_581_cast_fp16 = reshape(shape = concat_58x, x = var_557_cast_fp16)[name = string("op_581_cast_fp16")];
+            tensor<int32, [4]> var_582 = const()[name = string("op_582"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
+            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, ?]> transpose_58 = transpose(perm = transpose_58_perm_0, x = k_25_cast_fp16)[name = string("transpose_102")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_57 = transpose(perm = transpose_57_perm_0, x = q_19_cast_fp16)[name = string("transpose_103")];
+            tensor<fp16, [1, 8, ?, ?]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_57, y = transpose_58)[name = string("qk_13_cast_fp16")];
+            int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)];
+            int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)];
+            bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")];
+            tensor<int32, [2]> var_585_begin_0 = const()[name = string("op_585_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_585_end_mask_0 = const()[name = string("op_585_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_585_cast_fp16 = slice_by_index(begin = var_585_begin_0, end = concat_59, end_mask = var_585_end_mask_0, x = mask_to_fp16)[name = string("op_585_cast_fp16")];
+            int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)];
+            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
+            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")];
+            tensor<int32, [2]> var_586_begin_0 = const()[name = string("op_586_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_586_end_mask_0 = const()[name = string("op_586_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = concat_60, end_mask = var_586_end_mask_0, x = var_585_cast_fp16)[name = string("op_586_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_586_cast_fp16)[name = string("qk_15_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> var_589_cast_fp16 = softmax(axis = var_498, x = qk_15_cast_fp16)[name = string("op_589_cast_fp16")];
+            bool var_591_transpose_x_0 = const()[name = string("op_591_transpose_x_0"), val = bool(false)];
+            bool var_591_transpose_y_0 = const()[name = string("op_591_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, ?, 64]> v_25_cast_fp16 = transpose(perm = var_582, x = var_581_cast_fp16)[name = string("transpose_104")];
+            tensor<fp16, [1, 8, ?, 64]> var_591_cast_fp16 = matmul(transpose_x = var_591_transpose_x_0, transpose_y = var_591_transpose_y_0, x = var_589_cast_fp16, y = v_25_cast_fp16)[name = string("op_591_cast_fp16")];
+            tensor<int32, [4]> var_592 = const()[name = string("op_592"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_61x = const()[name = string("concat_61x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_593_cast_fp16 = transpose(perm = var_592, x = var_591_cast_fp16)[name = string("transpose_101")];
+            tensor<fp16, [1, ?, 512]> x_43_cast_fp16 = reshape(shape = concat_61x, x = var_593_cast_fp16)[name = string("x_43_cast_fp16")];
+            tensor<fp16, [512, 512]> var_597_to_fp16 = const()[name = string("op_597_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71800256)))];
+            tensor<fp16, [512]> var_598_to_fp16 = const()[name = string("op_598_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72324608)))];
+            tensor<fp16, [1, ?, 512]> linear_19_cast_fp16 = linear(bias = var_598_to_fp16, weight = var_597_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")];
+            tensor<int32, [1]> var_605_axes_0 = const()[name = string("op_605_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72325696)))];
+            tensor<fp16, [512]> blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72326784)))];
+            tensor<fp16, [1, ?, 512]> var_605_cast_fp16 = layer_norm(axes = var_605_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_605_cast_fp16")];
+            tensor<fp16, [512, 512]> var_614_to_fp16 = const()[name = string("op_614_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72327872)))];
+            tensor<fp16, [512]> var_615_to_fp16 = const()[name = string("op_615_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72852224)))];
+            tensor<fp16, [1, ?, 512]> linear_20_cast_fp16 = linear(bias = var_615_to_fp16, weight = var_614_to_fp16, x = var_605_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [3]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_63 = const()[name = string("concat_63"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_64 = const()[name = string("concat_64"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_66x = const()[name = string("concat_66x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_635_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_635_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_40_to_fp16 = const()[name = string("const_40_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_23_cast_fp16 = mul(x = var_635_cast_fp16, y = const_40_to_fp16)[name = string("q_23_cast_fp16")];
+            tensor<int32, [4]> var_641 = const()[name = string("op_641"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_642_cast_fp16 = reshape(shape = var_641, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_642_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_41_to_fp16 = const()[name = string("const_41_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_29_cast_fp16 = mul(x = var_642_cast_fp16, y = const_41_to_fp16)[name = string("k_29_cast_fp16")];
+            tensor<int32, [4]> var_648 = const()[name = string("op_648"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_649_cast_fp16 = reshape(shape = var_648, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_649_cast_fp16")];
+            tensor<int32, [4]> var_650 = const()[name = string("op_650"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
+            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_60_perm_0 = const()[name = string("transpose_60_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_60 = transpose(perm = transpose_60_perm_0, x = k_29_cast_fp16)[name = string("transpose_98")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_59 = transpose(perm = transpose_59_perm_0, x = q_23_cast_fp16)[name = string("transpose_99")];
+            tensor<fp16, [1, 8, ?, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_59, y = transpose_60)[name = string("qk_17_cast_fp16")];
+            tensor<fp16, [1, 8, ?, 1500]> var_654_cast_fp16 = softmax(axis = var_498, x = qk_17_cast_fp16)[name = string("op_654_cast_fp16")];
+            bool var_656_transpose_x_0 = const()[name = string("op_656_transpose_x_0"), val = bool(false)];
+            bool var_656_transpose_y_0 = const()[name = string("op_656_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_29_cast_fp16 = transpose(perm = var_650, x = var_649_cast_fp16)[name = string("transpose_100")];
+            tensor<fp16, [1, 8, ?, 64]> var_656_cast_fp16 = matmul(transpose_x = var_656_transpose_x_0, transpose_y = var_656_transpose_y_0, x = var_654_cast_fp16, y = v_29_cast_fp16)[name = string("op_656_cast_fp16")];
+            tensor<int32, [4]> var_657 = const()[name = string("op_657"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_67x = const()[name = string("concat_67x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_658_cast_fp16 = transpose(perm = var_657, x = var_656_cast_fp16)[name = string("transpose_97")];
+            tensor<fp16, [1, ?, 512]> x_49_cast_fp16 = reshape(shape = concat_67x, x = var_658_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<fp16, [512, 512]> var_662_to_fp16 = const()[name = string("op_662_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72853312)))];
+            tensor<fp16, [512]> var_663_to_fp16 = const()[name = string("op_663_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73377664)))];
+            tensor<fp16, [1, ?, 512]> linear_21_cast_fp16 = linear(bias = var_663_to_fp16, weight = var_662_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")];
+            tensor<int32, [1]> var_670_axes_0 = const()[name = string("op_670_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73378752)))];
+            tensor<fp16, [512]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73379840)))];
+            tensor<fp16, [1, ?, 512]> var_670_cast_fp16 = layer_norm(axes = var_670_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_670_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_679_to_fp16 = const()[name = string("op_679_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73380928)))];
+            tensor<fp16, [2048]> var_680_to_fp16 = const()[name = string("op_680_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75478144)))];
+            tensor<fp16, [1, ?, 2048]> linear_22_cast_fp16 = linear(bias = var_680_to_fp16, weight = var_679_to_fp16, x = var_670_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 2048]> x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_685_to_fp16 = const()[name = string("op_685_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75482304)))];
+            tensor<fp16, [512]> var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77579520)))];
+            tensor<fp16, [1, ?, 512]> linear_23_cast_fp16 = linear(bias = var_686_to_fp16, weight = var_685_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")];
+            tensor<int32, [4]> k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 512])];
+            tensor<bool, [4]> k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_16)[name = string("k_cache_13_cast_fp16")];
+            tensor<int32, [4]> v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 512])];
+            tensor<bool, [4]> v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_17)[name = string("v_cache_13_cast_fp16")];
+            tensor<int32, [4]> k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 512])];
+            tensor<bool, [4]> k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")];
+            tensor<int32, [4]> v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 512])];
+            tensor<bool, [4]> v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")];
+            int32 var_709 = const()[name = string("op_709"), val = int32(-1)];
+            tensor<int32, [1]> var_727_axes_0 = const()[name = string("op_727_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77580608)))];
+            tensor<fp16, [512]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77581696)))];
+            fp16 var_715_to_fp16 = const()[name = string("op_715_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 512]> var_727_cast_fp16 = layer_norm(axes = var_727_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_727_cast_fp16")];
+            tensor<fp16, [512, 512]> var_738_to_fp16 = const()[name = string("op_738_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77582784)))];
+            tensor<fp16, [512]> var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78107136)))];
+            tensor<fp16, [1, ?, 512]> linear_24_cast_fp16 = linear(bias = var_739_to_fp16, weight = var_738_to_fp16, x = var_727_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [512, 512]> var_742_to_fp16 = const()[name = string("op_742_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78108224)))];
+            tensor<fp16, [1, ?, 512]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_742_to_fp16, x = var_727_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [512, 512]> var_746_to_fp16 = const()[name = string("op_746_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78632576)))];
+            tensor<fp16, [512]> var_747_to_fp16 = const()[name = string("op_747_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79156928)))];
+            tensor<fp16, [1, ?, 512]> linear_26_cast_fp16 = linear(bias = var_747_to_fp16, weight = var_746_to_fp16, x = var_727_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [3]> var_749_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_749_shape_cast_fp16")];
+            int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
+            int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
+            bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
+            string var_749_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_749_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_749_shape_cast_fp16_to_uint16 = cast(dtype = var_749_shape_cast_fp16_to_uint16_dtype_0, x = var_749_shape_cast_fp16)[name = string("cast_72")];
+            uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_749_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
+            string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_71")];
+            int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")];
+            tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")];
+            tensor<int32, [1]> concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor<int32, [1]>([3])];
+            int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)];
+            bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")];
+            tensor<int32, [1]> concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)];
+            bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_16)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_18_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> coreml_update_state_18 = read_state(input = k_cache1)[name = string("coreml_update_state_18")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_17)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_19_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> coreml_update_state_19 = read_state(input = v_cache1)[name = string("coreml_update_state_19")];
+            int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)];
+            int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(512)];
+            int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)];
+            bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")];
+            tensor<int32, [3]> var_765_begin_0 = const()[name = string("op_765_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_765_end_mask_0 = const()[name = string("op_765_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_765_cast_fp16 = slice_by_index(begin = var_765_begin_0, end = concat_76, end_mask = var_765_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_765_cast_fp16")];
+            tensor<int32, [3]> var_768_begin_0 = const()[name = string("op_768_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_768_end_mask_0 = const()[name = string("op_768_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_768_cast_fp16 = slice_by_index(begin = var_768_begin_0, end = concat_76, end_mask = var_768_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_768_cast_fp16")];
+            tensor<int32, [4]> concat_78x = const()[name = string("concat_78x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_778_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_778_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_27_cast_fp16 = mul(x = var_778_cast_fp16, y = const_42_to_fp16)[name = string("q_27_cast_fp16")];
+            tensor<int32, [4]> concat_79x = const()[name = string("concat_79x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_785_cast_fp16 = reshape(shape = concat_79x, x = var_765_cast_fp16)[name = string("op_785_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_43_to_fp16 = const()[name = string("const_43_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> k_35_cast_fp16 = mul(x = var_785_cast_fp16, y = const_43_to_fp16)[name = string("k_35_cast_fp16")];
+            tensor<int32, [4]> concat_80x = const()[name = string("concat_80x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_792_cast_fp16 = reshape(shape = concat_80x, x = var_768_cast_fp16)[name = string("op_792_cast_fp16")];
+            tensor<int32, [4]> var_793 = const()[name = string("op_793"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
+            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_61_perm_0 = const()[name = string("transpose_61_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_62_perm_0 = const()[name = string("transpose_62_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, ?]> transpose_62 = transpose(perm = transpose_62_perm_0, x = k_35_cast_fp16)[name = string("transpose_94")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_61 = transpose(perm = transpose_61_perm_0, x = q_27_cast_fp16)[name = string("transpose_95")];
+            tensor<fp16, [1, 8, ?, ?]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_61, y = transpose_62)[name = string("qk_19_cast_fp16")];
+            int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)];
+            int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
+            bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")];
+            tensor<int32, [2]> var_796_begin_0 = const()[name = string("op_796_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_796_end_mask_0 = const()[name = string("op_796_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_796_cast_fp16 = slice_by_index(begin = var_796_begin_0, end = concat_81, end_mask = var_796_end_mask_0, x = mask_to_fp16)[name = string("op_796_cast_fp16")];
+            int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)];
+            int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)];
+            bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")];
+            tensor<int32, [2]> var_797_begin_0 = const()[name = string("op_797_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_797_end_mask_0 = const()[name = string("op_797_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_797_cast_fp16 = slice_by_index(begin = var_797_begin_0, end = concat_82, end_mask = var_797_end_mask_0, x = var_796_cast_fp16)[name = string("op_797_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_797_cast_fp16)[name = string("qk_21_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> var_800_cast_fp16 = softmax(axis = var_709, x = qk_21_cast_fp16)[name = string("op_800_cast_fp16")];
+            bool var_802_transpose_x_0 = const()[name = string("op_802_transpose_x_0"), val = bool(false)];
+            bool var_802_transpose_y_0 = const()[name = string("op_802_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, ?, 64]> v_35_cast_fp16 = transpose(perm = var_793, x = var_792_cast_fp16)[name = string("transpose_96")];
+            tensor<fp16, [1, 8, ?, 64]> var_802_cast_fp16 = matmul(transpose_x = var_802_transpose_x_0, transpose_y = var_802_transpose_y_0, x = var_800_cast_fp16, y = v_35_cast_fp16)[name = string("op_802_cast_fp16")];
+            tensor<int32, [4]> var_803 = const()[name = string("op_803"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_83x = const()[name = string("concat_83x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_804_cast_fp16 = transpose(perm = var_803, x = var_802_cast_fp16)[name = string("transpose_93")];
+            tensor<fp16, [1, ?, 512]> x_61_cast_fp16 = reshape(shape = concat_83x, x = var_804_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<fp16, [512, 512]> var_808_to_fp16 = const()[name = string("op_808_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79158016)))];
+            tensor<fp16, [512]> var_809_to_fp16 = const()[name = string("op_809_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79682368)))];
+            tensor<fp16, [1, ?, 512]> linear_27_cast_fp16 = linear(bias = var_809_to_fp16, weight = var_808_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")];
+            tensor<int32, [1]> var_816_axes_0 = const()[name = string("op_816_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79683456)))];
+            tensor<fp16, [512]> blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79684544)))];
+            tensor<fp16, [1, ?, 512]> var_816_cast_fp16 = layer_norm(axes = var_816_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_816_cast_fp16")];
+            tensor<fp16, [512, 512]> var_825_to_fp16 = const()[name = string("op_825_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79685632)))];
+            tensor<fp16, [512]> var_826_to_fp16 = const()[name = string("op_826_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80209984)))];
+            tensor<fp16, [1, ?, 512]> linear_28_cast_fp16 = linear(bias = var_826_to_fp16, weight = var_825_to_fp16, x = var_816_cast_fp16)[name = string("linear_28_cast_fp16")];
+            tensor<int32, [3]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_85 = const()[name = string("concat_85"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_87 = const()[name = string("concat_87"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_88x = const()[name = string("concat_88x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_846_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_846_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_44_to_fp16 = const()[name = string("const_44_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_31_cast_fp16 = mul(x = var_846_cast_fp16, y = const_44_to_fp16)[name = string("q_31_cast_fp16")];
+            tensor<int32, [4]> var_852 = const()[name = string("op_852"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_853_cast_fp16 = reshape(shape = var_852, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_853_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_45_to_fp16 = const()[name = string("const_45_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_39_cast_fp16 = mul(x = var_853_cast_fp16, y = const_45_to_fp16)[name = string("k_39_cast_fp16")];
+            tensor<int32, [4]> var_859 = const()[name = string("op_859"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_860_cast_fp16 = reshape(shape = var_859, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_860_cast_fp16")];
+            tensor<int32, [4]> var_861 = const()[name = string("op_861"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)];
+            bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_63_perm_0 = const()[name = string("transpose_63_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_64 = transpose(perm = transpose_64_perm_0, x = k_39_cast_fp16)[name = string("transpose_90")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_63 = transpose(perm = transpose_63_perm_0, x = q_31_cast_fp16)[name = string("transpose_91")];
+            tensor<fp16, [1, 8, ?, 1500]> qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_63, y = transpose_64)[name = string("qk_23_cast_fp16")];
+            tensor<fp16, [1, 8, ?, 1500]> var_865_cast_fp16 = softmax(axis = var_709, x = qk_23_cast_fp16)[name = string("op_865_cast_fp16")];
+            bool var_867_transpose_x_0 = const()[name = string("op_867_transpose_x_0"), val = bool(false)];
+            bool var_867_transpose_y_0 = const()[name = string("op_867_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_39_cast_fp16 = transpose(perm = var_861, x = var_860_cast_fp16)[name = string("transpose_92")];
+            tensor<fp16, [1, 8, ?, 64]> var_867_cast_fp16 = matmul(transpose_x = var_867_transpose_x_0, transpose_y = var_867_transpose_y_0, x = var_865_cast_fp16, y = v_39_cast_fp16)[name = string("op_867_cast_fp16")];
+            tensor<int32, [4]> var_868 = const()[name = string("op_868"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_89x = const()[name = string("concat_89x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_869_cast_fp16 = transpose(perm = var_868, x = var_867_cast_fp16)[name = string("transpose_89")];
+            tensor<fp16, [1, ?, 512]> x_67_cast_fp16 = reshape(shape = concat_89x, x = var_869_cast_fp16)[name = string("x_67_cast_fp16")];
+            tensor<fp16, [512, 512]> var_873_to_fp16 = const()[name = string("op_873_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80211072)))];
+            tensor<fp16, [512]> var_874_to_fp16 = const()[name = string("op_874_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80735424)))];
+            tensor<fp16, [1, ?, 512]> linear_29_cast_fp16 = linear(bias = var_874_to_fp16, weight = var_873_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")];
+            tensor<int32, [1]> var_881_axes_0 = const()[name = string("op_881_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80736512)))];
+            tensor<fp16, [512]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80737600)))];
+            tensor<fp16, [1, ?, 512]> var_881_cast_fp16 = layer_norm(axes = var_881_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_881_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_890_to_fp16 = const()[name = string("op_890_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80738688)))];
+            tensor<fp16, [2048]> var_891_to_fp16 = const()[name = string("op_891_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82835904)))];
+            tensor<fp16, [1, ?, 2048]> linear_30_cast_fp16 = linear(bias = var_891_to_fp16, weight = var_890_to_fp16, x = var_881_cast_fp16)[name = string("linear_30_cast_fp16")];
+            string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 2048]> x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_896_to_fp16 = const()[name = string("op_896_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82840064)))];
+            tensor<fp16, [512]> var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84937280)))];
+            tensor<fp16, [1, ?, 512]> linear_31_cast_fp16 = linear(bias = var_897_to_fp16, weight = var_896_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")];
+            tensor<int32, [4]> k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 512])];
+            tensor<bool, [4]> k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_18)[name = string("k_cache_17_cast_fp16")];
+            tensor<int32, [4]> v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 512])];
+            tensor<bool, [4]> v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_19)[name = string("v_cache_17_cast_fp16")];
+            tensor<int32, [4]> k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 512])];
+            tensor<bool, [4]> k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")];
+            tensor<int32, [4]> v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 512])];
+            tensor<bool, [4]> v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")];
+            int32 var_920 = const()[name = string("op_920"), val = int32(-1)];
+            tensor<int32, [1]> var_938_axes_0 = const()[name = string("op_938_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84938368)))];
+            tensor<fp16, [512]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84939456)))];
+            fp16 var_926_to_fp16 = const()[name = string("op_926_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 512]> var_938_cast_fp16 = layer_norm(axes = var_938_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_938_cast_fp16")];
+            tensor<fp16, [512, 512]> var_949_to_fp16 = const()[name = string("op_949_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84940544)))];
+            tensor<fp16, [512]> var_950_to_fp16 = const()[name = string("op_950_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85464896)))];
+            tensor<fp16, [1, ?, 512]> linear_32_cast_fp16 = linear(bias = var_950_to_fp16, weight = var_949_to_fp16, x = var_938_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<fp16, [512, 512]> var_953_to_fp16 = const()[name = string("op_953_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85465984)))];
+            tensor<fp16, [1, ?, 512]> linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_953_to_fp16, x = var_938_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<fp16, [512, 512]> var_957_to_fp16 = const()[name = string("op_957_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85990336)))];
+            tensor<fp16, [512]> var_958_to_fp16 = const()[name = string("op_958_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86514688)))];
+            tensor<fp16, [1, ?, 512]> linear_34_cast_fp16 = linear(bias = var_958_to_fp16, weight = var_957_to_fp16, x = var_938_cast_fp16)[name = string("linear_34_cast_fp16")];
+            tensor<int32, [3]> var_960_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_960_shape_cast_fp16")];
+            int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)];
+            int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)];
+            bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)];
+            string var_960_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_960_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_960_shape_cast_fp16_to_uint16 = cast(dtype = var_960_shape_cast_fp16_to_uint16_dtype_0, x = var_960_shape_cast_fp16)[name = string("cast_70")];
+            uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_960_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")];
+            string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_69")];
+            int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")];
+            tensor<int32, [1]> expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")];
+            tensor<int32, [1]> concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor<int32, [1]>([4])];
+            int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)];
+            bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")];
+            tensor<int32, [1]> concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
+            bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_18)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_20_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> coreml_update_state_20 = read_state(input = k_cache1)[name = string("coreml_update_state_20")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_19)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_21_write_state")];
+            tensor<fp16, [6, 1, 448, 512]> coreml_update_state_21 = read_state(input = v_cache1)[name = string("coreml_update_state_21")];
+            int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)];
+            int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(512)];
+            int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)];
+            bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")];
+            tensor<int32, [3]> var_976_begin_0 = const()[name = string("op_976_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_976_end_mask_0 = const()[name = string("op_976_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = concat_98, end_mask = var_976_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_976_cast_fp16")];
+            tensor<int32, [3]> var_979_begin_0 = const()[name = string("op_979_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_979_end_mask_0 = const()[name = string("op_979_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_979_cast_fp16 = slice_by_index(begin = var_979_begin_0, end = concat_98, end_mask = var_979_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_979_cast_fp16")];
+            tensor<int32, [4]> concat_100x = const()[name = string("concat_100x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_989_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_989_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_46_to_fp16 = const()[name = string("const_46_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_35_cast_fp16 = mul(x = var_989_cast_fp16, y = const_46_to_fp16)[name = string("q_35_cast_fp16")];
+            tensor<int32, [4]> concat_101x = const()[name = string("concat_101x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_996_cast_fp16 = reshape(shape = concat_101x, x = var_976_cast_fp16)[name = string("op_996_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_47_to_fp16 = const()[name = string("const_47_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> k_45_cast_fp16 = mul(x = var_996_cast_fp16, y = const_47_to_fp16)[name = string("k_45_cast_fp16")];
+            tensor<int32, [4]> concat_102x = const()[name = string("concat_102x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_1003_cast_fp16 = reshape(shape = concat_102x, x = var_979_cast_fp16)[name = string("op_1003_cast_fp16")];
+            tensor<int32, [4]> var_1004 = const()[name = string("op_1004"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)];
+            bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, ?]> transpose_66 = transpose(perm = transpose_66_perm_0, x = k_45_cast_fp16)[name = string("transpose_86")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_65 = transpose(perm = transpose_65_perm_0, x = q_35_cast_fp16)[name = string("transpose_87")];
+            tensor<fp16, [1, 8, ?, ?]> qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_65, y = transpose_66)[name = string("qk_25_cast_fp16")];
+            int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)];
+            int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)];
+            bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")];
+            tensor<int32, [2]> var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = concat_103, end_mask = var_1007_end_mask_0, x = mask_to_fp16)[name = string("op_1007_cast_fp16")];
+            int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)];
+            int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)];
+            bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")];
+            tensor<int32, [2]> var_1008_begin_0 = const()[name = string("op_1008_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1008_end_mask_0 = const()[name = string("op_1008_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = concat_104, end_mask = var_1008_end_mask_0, x = var_1007_cast_fp16)[name = string("op_1008_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1008_cast_fp16)[name = string("qk_27_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> var_1011_cast_fp16 = softmax(axis = var_920, x = qk_27_cast_fp16)[name = string("op_1011_cast_fp16")];
+            bool var_1013_transpose_x_0 = const()[name = string("op_1013_transpose_x_0"), val = bool(false)];
+            bool var_1013_transpose_y_0 = const()[name = string("op_1013_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, ?, 64]> v_45_cast_fp16 = transpose(perm = var_1004, x = var_1003_cast_fp16)[name = string("transpose_88")];
+            tensor<fp16, [1, 8, ?, 64]> var_1013_cast_fp16 = matmul(transpose_x = var_1013_transpose_x_0, transpose_y = var_1013_transpose_y_0, x = var_1011_cast_fp16, y = v_45_cast_fp16)[name = string("op_1013_cast_fp16")];
+            tensor<int32, [4]> var_1014 = const()[name = string("op_1014"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_105x = const()[name = string("concat_105x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_1015_cast_fp16 = transpose(perm = var_1014, x = var_1013_cast_fp16)[name = string("transpose_85")];
+            tensor<fp16, [1, ?, 512]> x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1015_cast_fp16)[name = string("x_79_cast_fp16")];
+            tensor<fp16, [512, 512]> var_1019_to_fp16 = const()[name = string("op_1019_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86515776)))];
+            tensor<fp16, [512]> var_1020_to_fp16 = const()[name = string("op_1020_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87040128)))];
+            tensor<fp16, [1, ?, 512]> linear_35_cast_fp16 = linear(bias = var_1020_to_fp16, weight = var_1019_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")];
+            tensor<int32, [1]> var_1027_axes_0 = const()[name = string("op_1027_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87041216)))];
+            tensor<fp16, [512]> blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87042304)))];
+            tensor<fp16, [1, ?, 512]> var_1027_cast_fp16 = layer_norm(axes = var_1027_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1027_cast_fp16")];
+            tensor<fp16, [512, 512]> var_1036_to_fp16 = const()[name = string("op_1036_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87043392)))];
+            tensor<fp16, [512]> var_1037_to_fp16 = const()[name = string("op_1037_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87567744)))];
+            tensor<fp16, [1, ?, 512]> linear_36_cast_fp16 = linear(bias = var_1037_to_fp16, weight = var_1036_to_fp16, x = var_1027_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<int32, [3]> concat_106 = const()[name = string("concat_106"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_108 = const()[name = string("concat_108"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_109 = const()[name = string("concat_109"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_110x = const()[name = string("concat_110x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_1057_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1057_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_48_to_fp16 = const()[name = string("const_48_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_39_cast_fp16 = mul(x = var_1057_cast_fp16, y = const_48_to_fp16)[name = string("q_39_cast_fp16")];
+            tensor<int32, [4]> var_1063 = const()[name = string("op_1063"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_1064_cast_fp16 = reshape(shape = var_1063, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1064_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_49_to_fp16 = const()[name = string("const_49_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_49_cast_fp16 = mul(x = var_1064_cast_fp16, y = const_49_to_fp16)[name = string("k_49_cast_fp16")];
+            tensor<int32, [4]> var_1070 = const()[name = string("op_1070"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_1071_cast_fp16 = reshape(shape = var_1070, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1071_cast_fp16")];
+            tensor<int32, [4]> var_1072 = const()[name = string("op_1072"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)];
+            bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_68 = transpose(perm = transpose_68_perm_0, x = k_49_cast_fp16)[name = string("transpose_82")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_67 = transpose(perm = transpose_67_perm_0, x = q_39_cast_fp16)[name = string("transpose_83")];
+            tensor<fp16, [1, 8, ?, 1500]> qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_67, y = transpose_68)[name = string("qk_29_cast_fp16")];
+            tensor<fp16, [1, 8, ?, 1500]> var_1076_cast_fp16 = softmax(axis = var_920, x = qk_29_cast_fp16)[name = string("op_1076_cast_fp16")];
+            bool var_1078_transpose_x_0 = const()[name = string("op_1078_transpose_x_0"), val = bool(false)];
+            bool var_1078_transpose_y_0 = const()[name = string("op_1078_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_49_cast_fp16 = transpose(perm = var_1072, x = var_1071_cast_fp16)[name = string("transpose_84")];
+            tensor<fp16, [1, 8, ?, 64]> var_1078_cast_fp16 = matmul(transpose_x = var_1078_transpose_x_0, transpose_y = var_1078_transpose_y_0, x = var_1076_cast_fp16, y = v_49_cast_fp16)[name = string("op_1078_cast_fp16")];
+            tensor<int32, [4]> var_1079 = const()[name = string("op_1079"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_111x = const()[name = string("concat_111x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_1080_cast_fp16 = transpose(perm = var_1079, x = var_1078_cast_fp16)[name = string("transpose_81")];
+            tensor<fp16, [1, ?, 512]> x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1080_cast_fp16)[name = string("x_85_cast_fp16")];
+            tensor<fp16, [512, 512]> var_1084_to_fp16 = const()[name = string("op_1084_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87568832)))];
+            tensor<fp16, [512]> var_1085_to_fp16 = const()[name = string("op_1085_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88093184)))];
+            tensor<fp16, [1, ?, 512]> linear_37_cast_fp16 = linear(bias = var_1085_to_fp16, weight = var_1084_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")];
+            tensor<int32, [1]> var_1092_axes_0 = const()[name = string("op_1092_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88094272)))];
+            tensor<fp16, [512]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88095360)))];
+            tensor<fp16, [1, ?, 512]> var_1092_cast_fp16 = layer_norm(axes = var_1092_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1092_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88096448)))];
+            tensor<fp16, [2048]> var_1102_to_fp16 = const()[name = string("op_1102_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90193664)))];
+            tensor<fp16, [1, ?, 2048]> linear_38_cast_fp16 = linear(bias = var_1102_to_fp16, weight = var_1101_to_fp16, x = var_1092_cast_fp16)[name = string("linear_38_cast_fp16")];
+            string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 2048]> x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_1107_to_fp16 = const()[name = string("op_1107_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90197824)))];
+            tensor<fp16, [512]> var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92295040)))];
+            tensor<fp16, [1, ?, 512]> linear_39_cast_fp16 = linear(bias = var_1108_to_fp16, weight = var_1107_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")];
+            tensor<int32, [4]> k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 512])];
+            tensor<bool, [4]> k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_20)[name = string("k_cache_21_cast_fp16")];
+            tensor<int32, [4]> v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 512])];
+            tensor<bool, [4]> v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 512]> v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_21)[name = string("v_cache_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 512])];
+            tensor<bool, [4]> k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")];
+            tensor<int32, [4]> v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 512])];
+            tensor<bool, [4]> v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")];
+            int32 var_1131 = const()[name = string("op_1131"), val = int32(-1)];
+            tensor<int32, [1]> var_1149_axes_0 = const()[name = string("op_1149_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92296128)))];
+            tensor<fp16, [512]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92297216)))];
+            fp16 var_1137_to_fp16 = const()[name = string("op_1137_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 512]> var_1149_cast_fp16 = layer_norm(axes = var_1149_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1149_cast_fp16")];
+            tensor<fp16, [512, 512]> var_1160_to_fp16 = const()[name = string("op_1160_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92298304)))];
+            tensor<fp16, [512]> var_1161_to_fp16 = const()[name = string("op_1161_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92822656)))];
+            tensor<fp16, [1, ?, 512]> linear_40_cast_fp16 = linear(bias = var_1161_to_fp16, weight = var_1160_to_fp16, x = var_1149_cast_fp16)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [512, 512]> var_1164_to_fp16 = const()[name = string("op_1164_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92823744)))];
+            tensor<fp16, [1, ?, 512]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1164_to_fp16, x = var_1149_cast_fp16)[name = string("linear_41_cast_fp16")];
+            tensor<fp16, [512, 512]> var_1168_to_fp16 = const()[name = string("op_1168_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93348096)))];
+            tensor<fp16, [512]> var_1169_to_fp16 = const()[name = string("op_1169_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93872448)))];
+            tensor<fp16, [1, ?, 512]> linear_42_cast_fp16 = linear(bias = var_1169_to_fp16, weight = var_1168_to_fp16, x = var_1149_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<int32, [3]> var_1171_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1171_shape_cast_fp16")];
+            int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)];
+            int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)];
+            bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)];
+            string var_1171_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1171_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1171_shape_cast_fp16_to_uint16 = cast(dtype = var_1171_shape_cast_fp16_to_uint16_dtype_0, x = var_1171_shape_cast_fp16)[name = string("cast_68")];
+            uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1171_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")];
+            string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_67")];
+            int32 end_step = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step")];
+            tensor<int32, [1]> expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step)[name = string("expand_dims_83")];
+            tensor<int32, [1]> concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor<int32, [1]>([5])];
+            int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
+            bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")];
+            tensor<int32, [1]> concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)];
+            bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_20)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_22_write_state")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [6, 1, 448, 512]> v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_21)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_23_write_state")];
+            int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)];
+            int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(512)];
+            int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)];
+            bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step, concat_120_values2_0))[name = string("concat_120")];
+            tensor<int32, [3]> var_1187_begin_0 = const()[name = string("op_1187_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1187_end_mask_0 = const()[name = string("op_1187_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_1187_cast_fp16 = slice_by_index(begin = var_1187_begin_0, end = concat_120, end_mask = var_1187_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1187_cast_fp16")];
+            tensor<int32, [3]> var_1190_begin_0 = const()[name = string("op_1190_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1190_end_mask_0 = const()[name = string("op_1190_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 512]> var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = concat_120, end_mask = var_1190_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1190_cast_fp16")];
+            tensor<int32, [4]> concat_122x = const()[name = string("concat_122x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_1200_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1200_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_50_to_fp16 = const()[name = string("const_50_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_43_cast_fp16 = mul(x = var_1200_cast_fp16, y = const_50_to_fp16)[name = string("q_43_cast_fp16")];
+            tensor<int32, [4]> concat_123x = const()[name = string("concat_123x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_1207_cast_fp16 = reshape(shape = concat_123x, x = var_1187_cast_fp16)[name = string("op_1207_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> k_55_cast_fp16 = mul(x = var_1207_cast_fp16, y = const_51_to_fp16)[name = string("k_55_cast_fp16")];
+            tensor<int32, [4]> concat_124x = const()[name = string("concat_124x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_1214_cast_fp16 = reshape(shape = concat_124x, x = var_1190_cast_fp16)[name = string("op_1214_cast_fp16")];
+            tensor<int32, [4]> var_1215 = const()[name = string("op_1215"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)];
+            bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, ?]> transpose_70 = transpose(perm = transpose_70_perm_0, x = k_55_cast_fp16)[name = string("transpose_78")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_69 = transpose(perm = transpose_69_perm_0, x = q_43_cast_fp16)[name = string("transpose_79")];
+            tensor<fp16, [1, 8, ?, ?]> qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_69, y = transpose_70)[name = string("qk_31_cast_fp16")];
+            int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)];
+            int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)];
+            bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")];
+            tensor<int32, [2]> var_1218_begin_0 = const()[name = string("op_1218_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1218_end_mask_0 = const()[name = string("op_1218_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = concat_125, end_mask = var_1218_end_mask_0, x = mask_to_fp16)[name = string("op_1218_cast_fp16")];
+            int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)];
+            int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)];
+            bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")];
+            tensor<int32, [2]> var_1219_begin_0 = const()[name = string("op_1219_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1219_end_mask_0 = const()[name = string("op_1219_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1219_cast_fp16 = slice_by_index(begin = var_1219_begin_0, end = concat_126, end_mask = var_1219_end_mask_0, x = var_1218_cast_fp16)[name = string("op_1219_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1219_cast_fp16)[name = string("qk_33_cast_fp16")];
+            tensor<fp16, [1, 8, ?, ?]> var_1222_cast_fp16 = softmax(axis = var_1131, x = qk_33_cast_fp16)[name = string("op_1222_cast_fp16")];
+            bool var_1224_transpose_x_0 = const()[name = string("op_1224_transpose_x_0"), val = bool(false)];
+            bool var_1224_transpose_y_0 = const()[name = string("op_1224_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, ?, 64]> v_55_cast_fp16 = transpose(perm = var_1215, x = var_1214_cast_fp16)[name = string("transpose_80")];
+            tensor<fp16, [1, 8, ?, 64]> var_1224_cast_fp16 = matmul(transpose_x = var_1224_transpose_x_0, transpose_y = var_1224_transpose_y_0, x = var_1222_cast_fp16, y = v_55_cast_fp16)[name = string("op_1224_cast_fp16")];
+            tensor<int32, [4]> var_1225 = const()[name = string("op_1225"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_127x = const()[name = string("concat_127x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_1226_cast_fp16 = transpose(perm = var_1225, x = var_1224_cast_fp16)[name = string("transpose_77")];
+            tensor<fp16, [1, ?, 512]> x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1226_cast_fp16)[name = string("x_97_cast_fp16")];
+            tensor<fp16, [512, 512]> var_1230_to_fp16 = const()[name = string("op_1230_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93873536)))];
+            tensor<fp16, [512]> var_1231_to_fp16 = const()[name = string("op_1231_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94397888)))];
+            tensor<fp16, [1, ?, 512]> linear_43_cast_fp16 = linear(bias = var_1231_to_fp16, weight = var_1230_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")];
+            tensor<int32, [1]> var_1238_axes_0 = const()[name = string("op_1238_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94398976)))];
+            tensor<fp16, [512]> blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94400064)))];
+            tensor<fp16, [1, ?, 512]> var_1238_cast_fp16 = layer_norm(axes = var_1238_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1238_cast_fp16")];
+            tensor<fp16, [512, 512]> var_1247_to_fp16 = const()[name = string("op_1247_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94401152)))];
+            tensor<fp16, [512]> var_1248_to_fp16 = const()[name = string("op_1248_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94925504)))];
+            tensor<fp16, [1, ?, 512]> linear_44_cast_fp16 = linear(bias = var_1248_to_fp16, weight = var_1247_to_fp16, x = var_1238_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [3]> concat_128 = const()[name = string("concat_128"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_129 = const()[name = string("concat_129"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_130 = const()[name = string("concat_130"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_131 = const()[name = string("concat_131"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 512]> v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_132x = const()[name = string("concat_132x"), val = tensor<int32, [4]>([1, -1, 8, 64])];
+            tensor<fp16, [1, ?, 8, 64]> var_1268_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1268_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_52_to_fp16 = const()[name = string("const_52_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 8, 64]> q_cast_fp16 = mul(x = var_1268_cast_fp16, y = const_52_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_1274 = const()[name = string("op_1274"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_1275_cast_fp16 = reshape(shape = var_1274, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1275_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_cast_fp16 = mul(x = var_1275_cast_fp16, y = const_53_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_1281 = const()[name = string("op_1281"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_1282_cast_fp16 = reshape(shape = var_1281, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1282_cast_fp16")];
+            tensor<int32, [4]> var_1283 = const()[name = string("op_1283"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_72 = transpose(perm = transpose_72_perm_0, x = k_cast_fp16)[name = string("transpose_74")];
+            tensor<fp16, [1, 8, ?, 64]> transpose_71 = transpose(perm = transpose_71_perm_0, x = q_cast_fp16)[name = string("transpose_75")];
+            tensor<fp16, [1, 8, ?, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_71, y = transpose_72)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 8, ?, 1500]> var_1287_cast_fp16 = softmax(axis = var_1131, x = qk_cast_fp16)[name = string("op_1287_cast_fp16")];
+            bool var_1289_transpose_x_0 = const()[name = string("op_1289_transpose_x_0"), val = bool(false)];
+            bool var_1289_transpose_y_0 = const()[name = string("op_1289_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_cast_fp16 = transpose(perm = var_1283, x = var_1282_cast_fp16)[name = string("transpose_76")];
+            tensor<fp16, [1, 8, ?, 64]> var_1289_cast_fp16 = matmul(transpose_x = var_1289_transpose_x_0, transpose_y = var_1289_transpose_y_0, x = var_1287_cast_fp16, y = v_cast_fp16)[name = string("op_1289_cast_fp16")];
+            tensor<int32, [4]> var_1290 = const()[name = string("op_1290"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_133x = const()[name = string("concat_133x"), val = tensor<int32, [3]>([1, -1, 512])];
+            tensor<fp16, [1, ?, 8, 64]> var_1291_cast_fp16 = transpose(perm = var_1290, x = var_1289_cast_fp16)[name = string("transpose_73")];
+            tensor<fp16, [1, ?, 512]> x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1291_cast_fp16)[name = string("x_103_cast_fp16")];
+            tensor<fp16, [512, 512]> var_1295_to_fp16 = const()[name = string("op_1295_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94926592)))];
+            tensor<fp16, [512]> var_1296_to_fp16 = const()[name = string("op_1296_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95450944)))];
+            tensor<fp16, [1, ?, 512]> linear_45_cast_fp16 = linear(bias = var_1296_to_fp16, weight = var_1295_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")];
+            tensor<int32, [1]> var_1303_axes_0 = const()[name = string("op_1303_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95452032)))];
+            tensor<fp16, [512]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95453120)))];
+            tensor<fp16, [1, ?, 512]> var_1303_cast_fp16 = layer_norm(axes = var_1303_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1303_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_1312_to_fp16 = const()[name = string("op_1312_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95454208)))];
+            tensor<fp16, [2048]> var_1313_to_fp16 = const()[name = string("op_1313_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97551424)))];
+            tensor<fp16, [1, ?, 2048]> linear_46_cast_fp16 = linear(bias = var_1313_to_fp16, weight = var_1312_to_fp16, x = var_1303_cast_fp16)[name = string("linear_46_cast_fp16")];
+            string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 2048]> x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_1318_to_fp16 = const()[name = string("op_1318_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97555584)))];
+            tensor<fp16, [512]> var_1319_to_fp16 = const()[name = string("op_1319_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99652800)))];
+            tensor<fp16, [1, ?, 512]> linear_47_cast_fp16 = linear(bias = var_1319_to_fp16, weight = var_1318_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")];
+            tensor<fp16, [1, ?, 512]> x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")];
+            tensor<int32, [1]> var_1332_axes_0 = const()[name = string("op_1332_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99653888)))];
+            tensor<fp16, [512]> ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99654976)))];
+            fp16 var_1323_to_fp16 = const()[name = string("op_1323_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 512]> var_1332_cast_fp16 = layer_norm(axes = var_1332_axes_0, beta = ln_bias_to_fp16, epsilon = var_1323_to_fp16, gamma = ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1332_cast_fp16")];
+            tensor<fp16, [51865]> var_1342_bias_0_to_fp16 = const()[name = string("op_1342_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99656064)))];
+            tensor<fp16, [1, ?, 51865]> logits = linear(bias = var_1342_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_1332_cast_fp16)[name = string("op_1342_cast_fp16")];
+        } -> (logits);
+}
\ No newline at end of file
diff --git a/base/decoder_second.mlmodelc/weights/weight.bin b/base/decoder_second.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3153d1bbcd435e50a4cf80c99254e8a03ffbdc2f
--- /dev/null
+++ b/base/decoder_second.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94338bcd9475d6d8848699ee40dd6fac40d1e597c1e28d124454a7bf37bff672
+size 99759858
diff --git a/base/encoder.mlmodelc/analytics/coremldata.bin b/base/encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b2ae2d1f6d9f1fe93e00469b454364bdbe8e910c
--- /dev/null
+++ b/base/encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:405dc318443c493222a32916d66c5d908d7cc1d250f73e9a192d5b734a8494ed
+size 243
diff --git a/base/encoder.mlmodelc/coremldata.bin b/base/encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9ca7832b786c7f159522ab760ae244ed35db998f
--- /dev/null
+++ b/base/encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b04d5884b5a2d983f52a0e557aff2e7d3dff78b2a9f9d496a5280546bacfaff
+size 318
diff --git a/base/encoder.mlmodelc/metadata.json b/base/encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ab391b2709a9fb2b09b1f7b6d89ca78857d25db
--- /dev/null
+++ b/base/encoder.mlmodelc/metadata.json
@@ -0,0 +1,69 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 512]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.mul" : 12,
+      "Ios18.softmax" : 6,
+      "Ios18.linear" : 36,
+      "Ios18.gelu" : 8,
+      "Ios18.layerNorm" : 13,
+      "Ios18.transpose" : 25,
+      "Ios18.matmul" : 12,
+      "Ios18.conv" : 2,
+      "Ios18.add" : 13,
+      "Ios18.reshape" : 24
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "encoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/base/encoder.mlmodelc/model.mil b/base/encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..4dc0b2790a85a39113a2dfcbe5820fc579e08264
--- /dev/null
+++ b/base/encoder.mlmodelc/model.mil
@@ -0,0 +1,384 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [1, 80, 3000]> logmel_data) {
+            string var_32_pad_type_0 = const()[name = string("op_32_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_32_pad_0 = const()[name = string("op_32_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_32_strides_0 = const()[name = string("op_32_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_32_dilations_0 = const()[name = string("op_32_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_32_groups_0 = const()[name = string("op_32_groups_0"), val = int32(1)];
+            tensor<fp16, [512, 80, 3]> weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor<fp16, [512, 80, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [512]> bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245888)))];
+            tensor<fp16, [1, 512, 3000]> var_32_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_32_dilations_0, groups = var_32_groups_0, pad = var_32_pad_0, pad_type = var_32_pad_type_0, strides = var_32_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_32_cast_fp16")];
+            string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 512, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_32_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_50_pad_type_0 = const()[name = string("op_50_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_50_pad_0 = const()[name = string("op_50_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_50_strides_0 = const()[name = string("op_50_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_50_dilations_0 = const()[name = string("op_50_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_50_groups_0 = const()[name = string("op_50_groups_0"), val = int32(1)];
+            tensor<fp16, [512, 512, 3]> weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor<fp16, [512, 512, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246976)))];
+            tensor<fp16, [512]> bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1819904)))];
+            tensor<fp16, [1, 512, 1500]> var_50_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_50_dilations_0, groups = var_50_groups_0, pad = var_50_pad_0, pad_type = var_50_pad_type_0, strides = var_50_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_50_cast_fp16")];
+            string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 512, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_50_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<int32, [3]> var_56 = const()[name = string("op_56"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [1500, 512]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [1500, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1820992)))];
+            tensor<fp16, [1, 1500, 512]> x_5_cast_fp16 = transpose(perm = var_56, x = x_3_cast_fp16)[name = string("transpose_60")];
+            tensor<fp16, [1, 1500, 512]> var_59_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_59_cast_fp16")];
+            int32 var_72 = const()[name = string("op_72"), val = int32(-1)];
+            tensor<int32, [1]> var_88_axes_0 = const()[name = string("op_88_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3357056)))];
+            tensor<fp16, [512]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3358144)))];
+            fp16 var_78_to_fp16 = const()[name = string("op_78_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 512]> var_88_cast_fp16 = layer_norm(axes = var_88_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_78_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_59_cast_fp16)[name = string("op_88_cast_fp16")];
+            tensor<fp16, [512, 512]> var_99_to_fp16 = const()[name = string("op_99_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3359232)))];
+            tensor<fp16, [512]> var_100_to_fp16 = const()[name = string("op_100_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3883584)))];
+            tensor<fp16, [1, 1500, 512]> linear_0_cast_fp16 = linear(bias = var_100_to_fp16, weight = var_99_to_fp16, x = var_88_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [512, 512]> var_103_to_fp16 = const()[name = string("op_103_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3884672)))];
+            tensor<fp16, [512]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4409024)))];
+            tensor<fp16, [1, 1500, 512]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_103_to_fp16, x = var_88_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [512, 512]> var_107_to_fp16 = const()[name = string("op_107_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4410112)))];
+            tensor<fp16, [512]> var_108_to_fp16 = const()[name = string("op_108_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4934464)))];
+            tensor<fp16, [1, 1500, 512]> linear_2_cast_fp16 = linear(bias = var_108_to_fp16, weight = var_107_to_fp16, x = var_88_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [4]> var_116 = const()[name = string("op_116"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_117_cast_fp16 = reshape(shape = var_116, x = linear_0_cast_fp16)[name = string("op_117_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> q_3_cast_fp16 = mul(x = var_117_cast_fp16, y = const_42_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> var_123 = const()[name = string("op_123"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_124_cast_fp16 = reshape(shape = var_123, x = linear_1_cast_fp16)[name = string("op_124_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_43_to_fp16 = const()[name = string("const_43_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_3_cast_fp16 = mul(x = var_124_cast_fp16, y = const_43_to_fp16)[name = string("k_3_cast_fp16")];
+            tensor<int32, [4]> var_130 = const()[name = string("op_130"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_131_cast_fp16 = reshape(shape = var_130, x = linear_2_cast_fp16)[name = string("op_131_cast_fp16")];
+            tensor<int32, [4]> var_132 = const()[name = string("op_132"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_25 = transpose(perm = transpose_25_perm_0, x = k_3_cast_fp16)[name = string("transpose_57")];
+            tensor<fp16, [1, 8, 1500, 64]> transpose_24 = transpose(perm = transpose_24_perm_0, x = q_3_cast_fp16)[name = string("transpose_58")];
+            tensor<fp16, [1, 8, 1500, 1500]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_24, y = transpose_25)[name = string("qk_1_cast_fp16")];
+            tensor<fp16, [1, 8, 1500, 1500]> var_136_cast_fp16 = softmax(axis = var_72, x = qk_1_cast_fp16)[name = string("op_136_cast_fp16")];
+            bool var_138_transpose_x_0 = const()[name = string("op_138_transpose_x_0"), val = bool(false)];
+            bool var_138_transpose_y_0 = const()[name = string("op_138_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_3_cast_fp16 = transpose(perm = var_132, x = var_131_cast_fp16)[name = string("transpose_59")];
+            tensor<fp16, [1, 8, 1500, 64]> var_138_cast_fp16 = matmul(transpose_x = var_138_transpose_x_0, transpose_y = var_138_transpose_y_0, x = var_136_cast_fp16, y = v_3_cast_fp16)[name = string("op_138_cast_fp16")];
+            tensor<int32, [4]> var_139 = const()[name = string("op_139"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [3]>([1, 1500, 512])];
+            tensor<fp16, [1, 1500, 8, 64]> var_140_cast_fp16 = transpose(perm = var_139, x = var_138_cast_fp16)[name = string("transpose_56")];
+            tensor<fp16, [1, 1500, 512]> x_11_cast_fp16 = reshape(shape = concat_0, x = var_140_cast_fp16)[name = string("x_11_cast_fp16")];
+            tensor<fp16, [512, 512]> var_144_to_fp16 = const()[name = string("op_144_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4935552)))];
+            tensor<fp16, [512]> var_145_to_fp16 = const()[name = string("op_145_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5459904)))];
+            tensor<fp16, [1, 1500, 512]> linear_3_cast_fp16 = linear(bias = var_145_to_fp16, weight = var_144_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_13_cast_fp16 = add(x = var_59_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<int32, [1]> var_152_axes_0 = const()[name = string("op_152_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5460992)))];
+            tensor<fp16, [512]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5462080)))];
+            tensor<fp16, [1, 1500, 512]> var_152_cast_fp16 = layer_norm(axes = var_152_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_78_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_152_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_161_to_fp16 = const()[name = string("op_161_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5463168)))];
+            tensor<fp16, [2048]> var_162_to_fp16 = const()[name = string("op_162_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7560384)))];
+            tensor<fp16, [1, 1500, 2048]> linear_4_cast_fp16 = linear(bias = var_162_to_fp16, weight = var_161_to_fp16, x = var_152_cast_fp16)[name = string("linear_4_cast_fp16")];
+            string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 2048]> x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_167_to_fp16 = const()[name = string("op_167_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7564544)))];
+            tensor<fp16, [512]> var_168_to_fp16 = const()[name = string("op_168_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9661760)))];
+            tensor<fp16, [1, 1500, 512]> linear_5_cast_fp16 = linear(bias = var_168_to_fp16, weight = var_167_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")];
+            int32 var_178 = const()[name = string("op_178"), val = int32(-1)];
+            tensor<int32, [1]> var_194_axes_0 = const()[name = string("op_194_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9662848)))];
+            tensor<fp16, [512]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9663936)))];
+            fp16 var_184_to_fp16 = const()[name = string("op_184_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 512]> var_194_cast_fp16 = layer_norm(axes = var_194_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_184_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_194_cast_fp16")];
+            tensor<fp16, [512, 512]> var_205_to_fp16 = const()[name = string("op_205_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9665024)))];
+            tensor<fp16, [512]> var_206_to_fp16 = const()[name = string("op_206_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10189376)))];
+            tensor<fp16, [1, 1500, 512]> linear_6_cast_fp16 = linear(bias = var_206_to_fp16, weight = var_205_to_fp16, x = var_194_cast_fp16)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [512, 512]> var_209_to_fp16 = const()[name = string("op_209_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10190464)))];
+            tensor<fp16, [1, 1500, 512]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_209_to_fp16, x = var_194_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [512, 512]> var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10714816)))];
+            tensor<fp16, [512]> var_214_to_fp16 = const()[name = string("op_214_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11239168)))];
+            tensor<fp16, [1, 1500, 512]> linear_8_cast_fp16 = linear(bias = var_214_to_fp16, weight = var_213_to_fp16, x = var_194_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<int32, [4]> var_222 = const()[name = string("op_222"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_223_cast_fp16 = reshape(shape = var_222, x = linear_6_cast_fp16)[name = string("op_223_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_44_to_fp16 = const()[name = string("const_44_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> q_7_cast_fp16 = mul(x = var_223_cast_fp16, y = const_44_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_229 = const()[name = string("op_229"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_230_cast_fp16 = reshape(shape = var_229, x = linear_7_cast_fp16)[name = string("op_230_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_45_to_fp16 = const()[name = string("const_45_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_7_cast_fp16 = mul(x = var_230_cast_fp16, y = const_45_to_fp16)[name = string("k_7_cast_fp16")];
+            tensor<int32, [4]> var_236 = const()[name = string("op_236"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_237_cast_fp16 = reshape(shape = var_236, x = linear_8_cast_fp16)[name = string("op_237_cast_fp16")];
+            tensor<int32, [4]> var_238 = const()[name = string("op_238"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)];
+            bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_27_perm_0 = const()[name = string("transpose_27_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_27 = transpose(perm = transpose_27_perm_0, x = k_7_cast_fp16)[name = string("transpose_53")];
+            tensor<fp16, [1, 8, 1500, 64]> transpose_26 = transpose(perm = transpose_26_perm_0, x = q_7_cast_fp16)[name = string("transpose_54")];
+            tensor<fp16, [1, 8, 1500, 1500]> qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_26, y = transpose_27)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 8, 1500, 1500]> var_242_cast_fp16 = softmax(axis = var_178, x = qk_3_cast_fp16)[name = string("op_242_cast_fp16")];
+            bool var_244_transpose_x_0 = const()[name = string("op_244_transpose_x_0"), val = bool(false)];
+            bool var_244_transpose_y_0 = const()[name = string("op_244_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_7_cast_fp16 = transpose(perm = var_238, x = var_237_cast_fp16)[name = string("transpose_55")];
+            tensor<fp16, [1, 8, 1500, 64]> var_244_cast_fp16 = matmul(transpose_x = var_244_transpose_x_0, transpose_y = var_244_transpose_y_0, x = var_242_cast_fp16, y = v_7_cast_fp16)[name = string("op_244_cast_fp16")];
+            tensor<int32, [4]> var_245 = const()[name = string("op_245"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [3]>([1, 1500, 512])];
+            tensor<fp16, [1, 1500, 8, 64]> var_246_cast_fp16 = transpose(perm = var_245, x = var_244_cast_fp16)[name = string("transpose_52")];
+            tensor<fp16, [1, 1500, 512]> x_23_cast_fp16 = reshape(shape = concat_1, x = var_246_cast_fp16)[name = string("x_23_cast_fp16")];
+            tensor<fp16, [512, 512]> var_250_to_fp16 = const()[name = string("op_250_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11240256)))];
+            tensor<fp16, [512]> var_251_to_fp16 = const()[name = string("op_251_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11764608)))];
+            tensor<fp16, [1, 1500, 512]> linear_9_cast_fp16 = linear(bias = var_251_to_fp16, weight = var_250_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<int32, [1]> var_258_axes_0 = const()[name = string("op_258_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11765696)))];
+            tensor<fp16, [512]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11766784)))];
+            tensor<fp16, [1, 1500, 512]> var_258_cast_fp16 = layer_norm(axes = var_258_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_184_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_258_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_267_to_fp16 = const()[name = string("op_267_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11767872)))];
+            tensor<fp16, [2048]> var_268_to_fp16 = const()[name = string("op_268_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13865088)))];
+            tensor<fp16, [1, 1500, 2048]> linear_10_cast_fp16 = linear(bias = var_268_to_fp16, weight = var_267_to_fp16, x = var_258_cast_fp16)[name = string("linear_10_cast_fp16")];
+            string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 2048]> x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_273_to_fp16 = const()[name = string("op_273_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13869248)))];
+            tensor<fp16, [512]> var_274_to_fp16 = const()[name = string("op_274_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15966464)))];
+            tensor<fp16, [1, 1500, 512]> linear_11_cast_fp16 = linear(bias = var_274_to_fp16, weight = var_273_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")];
+            int32 var_284 = const()[name = string("op_284"), val = int32(-1)];
+            tensor<int32, [1]> var_300_axes_0 = const()[name = string("op_300_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15967552)))];
+            tensor<fp16, [512]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15968640)))];
+            fp16 var_290_to_fp16 = const()[name = string("op_290_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 512]> var_300_cast_fp16 = layer_norm(axes = var_300_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_290_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_300_cast_fp16")];
+            tensor<fp16, [512, 512]> var_311_to_fp16 = const()[name = string("op_311_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15969728)))];
+            tensor<fp16, [512]> var_312_to_fp16 = const()[name = string("op_312_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16494080)))];
+            tensor<fp16, [1, 1500, 512]> linear_12_cast_fp16 = linear(bias = var_312_to_fp16, weight = var_311_to_fp16, x = var_300_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [512, 512]> var_315_to_fp16 = const()[name = string("op_315_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16495168)))];
+            tensor<fp16, [1, 1500, 512]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_315_to_fp16, x = var_300_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [512, 512]> var_319_to_fp16 = const()[name = string("op_319_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17019520)))];
+            tensor<fp16, [512]> var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17543872)))];
+            tensor<fp16, [1, 1500, 512]> linear_14_cast_fp16 = linear(bias = var_320_to_fp16, weight = var_319_to_fp16, x = var_300_cast_fp16)[name = string("linear_14_cast_fp16")];
+            tensor<int32, [4]> var_328 = const()[name = string("op_328"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_329_cast_fp16 = reshape(shape = var_328, x = linear_12_cast_fp16)[name = string("op_329_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_46_to_fp16 = const()[name = string("const_46_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> q_11_cast_fp16 = mul(x = var_329_cast_fp16, y = const_46_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> var_335 = const()[name = string("op_335"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_336_cast_fp16 = reshape(shape = var_335, x = linear_13_cast_fp16)[name = string("op_336_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_47_to_fp16 = const()[name = string("const_47_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_11_cast_fp16 = mul(x = var_336_cast_fp16, y = const_47_to_fp16)[name = string("k_11_cast_fp16")];
+            tensor<int32, [4]> var_342 = const()[name = string("op_342"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_343_cast_fp16 = reshape(shape = var_342, x = linear_14_cast_fp16)[name = string("op_343_cast_fp16")];
+            tensor<int32, [4]> var_344 = const()[name = string("op_344"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_28_perm_0 = const()[name = string("transpose_28_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_29 = transpose(perm = transpose_29_perm_0, x = k_11_cast_fp16)[name = string("transpose_49")];
+            tensor<fp16, [1, 8, 1500, 64]> transpose_28 = transpose(perm = transpose_28_perm_0, x = q_11_cast_fp16)[name = string("transpose_50")];
+            tensor<fp16, [1, 8, 1500, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_28, y = transpose_29)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 8, 1500, 1500]> var_348_cast_fp16 = softmax(axis = var_284, x = qk_5_cast_fp16)[name = string("op_348_cast_fp16")];
+            bool var_350_transpose_x_0 = const()[name = string("op_350_transpose_x_0"), val = bool(false)];
+            bool var_350_transpose_y_0 = const()[name = string("op_350_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_11_cast_fp16 = transpose(perm = var_344, x = var_343_cast_fp16)[name = string("transpose_51")];
+            tensor<fp16, [1, 8, 1500, 64]> var_350_cast_fp16 = matmul(transpose_x = var_350_transpose_x_0, transpose_y = var_350_transpose_y_0, x = var_348_cast_fp16, y = v_11_cast_fp16)[name = string("op_350_cast_fp16")];
+            tensor<int32, [4]> var_351 = const()[name = string("op_351"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [3]>([1, 1500, 512])];
+            tensor<fp16, [1, 1500, 8, 64]> var_352_cast_fp16 = transpose(perm = var_351, x = var_350_cast_fp16)[name = string("transpose_48")];
+            tensor<fp16, [1, 1500, 512]> x_35_cast_fp16 = reshape(shape = concat_2, x = var_352_cast_fp16)[name = string("x_35_cast_fp16")];
+            tensor<fp16, [512, 512]> var_356_to_fp16 = const()[name = string("op_356_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17544960)))];
+            tensor<fp16, [512]> var_357_to_fp16 = const()[name = string("op_357_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18069312)))];
+            tensor<fp16, [1, 1500, 512]> linear_15_cast_fp16 = linear(bias = var_357_to_fp16, weight = var_356_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<int32, [1]> var_364_axes_0 = const()[name = string("op_364_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18070400)))];
+            tensor<fp16, [512]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18071488)))];
+            tensor<fp16, [1, 1500, 512]> var_364_cast_fp16 = layer_norm(axes = var_364_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_290_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_364_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_373_to_fp16 = const()[name = string("op_373_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18072576)))];
+            tensor<fp16, [2048]> var_374_to_fp16 = const()[name = string("op_374_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20169792)))];
+            tensor<fp16, [1, 1500, 2048]> linear_16_cast_fp16 = linear(bias = var_374_to_fp16, weight = var_373_to_fp16, x = var_364_cast_fp16)[name = string("linear_16_cast_fp16")];
+            string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 2048]> x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_379_to_fp16 = const()[name = string("op_379_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20173952)))];
+            tensor<fp16, [512]> var_380_to_fp16 = const()[name = string("op_380_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22271168)))];
+            tensor<fp16, [1, 1500, 512]> linear_17_cast_fp16 = linear(bias = var_380_to_fp16, weight = var_379_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")];
+            int32 var_390 = const()[name = string("op_390"), val = int32(-1)];
+            tensor<int32, [1]> var_406_axes_0 = const()[name = string("op_406_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22272256)))];
+            tensor<fp16, [512]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22273344)))];
+            fp16 var_396_to_fp16 = const()[name = string("op_396_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 512]> var_406_cast_fp16 = layer_norm(axes = var_406_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_396_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_406_cast_fp16")];
+            tensor<fp16, [512, 512]> var_417_to_fp16 = const()[name = string("op_417_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22274432)))];
+            tensor<fp16, [512]> var_418_to_fp16 = const()[name = string("op_418_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22798784)))];
+            tensor<fp16, [1, 1500, 512]> linear_18_cast_fp16 = linear(bias = var_418_to_fp16, weight = var_417_to_fp16, x = var_406_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [512, 512]> var_421_to_fp16 = const()[name = string("op_421_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22799872)))];
+            tensor<fp16, [1, 1500, 512]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_421_to_fp16, x = var_406_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [512, 512]> var_425_to_fp16 = const()[name = string("op_425_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23324224)))];
+            tensor<fp16, [512]> var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23848576)))];
+            tensor<fp16, [1, 1500, 512]> linear_20_cast_fp16 = linear(bias = var_426_to_fp16, weight = var_425_to_fp16, x = var_406_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [4]> var_434 = const()[name = string("op_434"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_435_cast_fp16 = reshape(shape = var_434, x = linear_18_cast_fp16)[name = string("op_435_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_48_to_fp16 = const()[name = string("const_48_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> q_15_cast_fp16 = mul(x = var_435_cast_fp16, y = const_48_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_441 = const()[name = string("op_441"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_442_cast_fp16 = reshape(shape = var_441, x = linear_19_cast_fp16)[name = string("op_442_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_49_to_fp16 = const()[name = string("const_49_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_15_cast_fp16 = mul(x = var_442_cast_fp16, y = const_49_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> var_448 = const()[name = string("op_448"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_449_cast_fp16 = reshape(shape = var_448, x = linear_20_cast_fp16)[name = string("op_449_cast_fp16")];
+            tensor<int32, [4]> var_450 = const()[name = string("op_450"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_30_perm_0 = const()[name = string("transpose_30_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_31_perm_0 = const()[name = string("transpose_31_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_31 = transpose(perm = transpose_31_perm_0, x = k_15_cast_fp16)[name = string("transpose_45")];
+            tensor<fp16, [1, 8, 1500, 64]> transpose_30 = transpose(perm = transpose_30_perm_0, x = q_15_cast_fp16)[name = string("transpose_46")];
+            tensor<fp16, [1, 8, 1500, 1500]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_30, y = transpose_31)[name = string("qk_7_cast_fp16")];
+            tensor<fp16, [1, 8, 1500, 1500]> var_454_cast_fp16 = softmax(axis = var_390, x = qk_7_cast_fp16)[name = string("op_454_cast_fp16")];
+            bool var_456_transpose_x_0 = const()[name = string("op_456_transpose_x_0"), val = bool(false)];
+            bool var_456_transpose_y_0 = const()[name = string("op_456_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_15_cast_fp16 = transpose(perm = var_450, x = var_449_cast_fp16)[name = string("transpose_47")];
+            tensor<fp16, [1, 8, 1500, 64]> var_456_cast_fp16 = matmul(transpose_x = var_456_transpose_x_0, transpose_y = var_456_transpose_y_0, x = var_454_cast_fp16, y = v_15_cast_fp16)[name = string("op_456_cast_fp16")];
+            tensor<int32, [4]> var_457 = const()[name = string("op_457"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [3]>([1, 1500, 512])];
+            tensor<fp16, [1, 1500, 8, 64]> var_458_cast_fp16 = transpose(perm = var_457, x = var_456_cast_fp16)[name = string("transpose_44")];
+            tensor<fp16, [1, 1500, 512]> x_47_cast_fp16 = reshape(shape = concat_3, x = var_458_cast_fp16)[name = string("x_47_cast_fp16")];
+            tensor<fp16, [512, 512]> var_462_to_fp16 = const()[name = string("op_462_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23849664)))];
+            tensor<fp16, [512]> var_463_to_fp16 = const()[name = string("op_463_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24374016)))];
+            tensor<fp16, [1, 1500, 512]> linear_21_cast_fp16 = linear(bias = var_463_to_fp16, weight = var_462_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<int32, [1]> var_470_axes_0 = const()[name = string("op_470_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24375104)))];
+            tensor<fp16, [512]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24376192)))];
+            tensor<fp16, [1, 1500, 512]> var_470_cast_fp16 = layer_norm(axes = var_470_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_396_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_470_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_479_to_fp16 = const()[name = string("op_479_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24377280)))];
+            tensor<fp16, [2048]> var_480_to_fp16 = const()[name = string("op_480_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26474496)))];
+            tensor<fp16, [1, 1500, 2048]> linear_22_cast_fp16 = linear(bias = var_480_to_fp16, weight = var_479_to_fp16, x = var_470_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 2048]> x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_485_to_fp16 = const()[name = string("op_485_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26478656)))];
+            tensor<fp16, [512]> var_486_to_fp16 = const()[name = string("op_486_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28575872)))];
+            tensor<fp16, [1, 1500, 512]> linear_23_cast_fp16 = linear(bias = var_486_to_fp16, weight = var_485_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_55_cast_fp16")];
+            int32 var_496 = const()[name = string("op_496"), val = int32(-1)];
+            tensor<int32, [1]> var_512_axes_0 = const()[name = string("op_512_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28576960)))];
+            tensor<fp16, [512]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28578048)))];
+            fp16 var_502_to_fp16 = const()[name = string("op_502_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 512]> var_512_cast_fp16 = layer_norm(axes = var_512_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_502_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = string("op_512_cast_fp16")];
+            tensor<fp16, [512, 512]> var_523_to_fp16 = const()[name = string("op_523_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28579136)))];
+            tensor<fp16, [512]> var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29103488)))];
+            tensor<fp16, [1, 1500, 512]> linear_24_cast_fp16 = linear(bias = var_524_to_fp16, weight = var_523_to_fp16, x = var_512_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [512, 512]> var_527_to_fp16 = const()[name = string("op_527_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29104576)))];
+            tensor<fp16, [1, 1500, 512]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_527_to_fp16, x = var_512_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [512, 512]> var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29628928)))];
+            tensor<fp16, [512]> var_532_to_fp16 = const()[name = string("op_532_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30153280)))];
+            tensor<fp16, [1, 1500, 512]> linear_26_cast_fp16 = linear(bias = var_532_to_fp16, weight = var_531_to_fp16, x = var_512_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [4]> var_540 = const()[name = string("op_540"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_541_cast_fp16 = reshape(shape = var_540, x = linear_24_cast_fp16)[name = string("op_541_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_50_to_fp16 = const()[name = string("const_50_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> q_19_cast_fp16 = mul(x = var_541_cast_fp16, y = const_50_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> var_547 = const()[name = string("op_547"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_548_cast_fp16 = reshape(shape = var_547, x = linear_25_cast_fp16)[name = string("op_548_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_19_cast_fp16 = mul(x = var_548_cast_fp16, y = const_51_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_554 = const()[name = string("op_554"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_555_cast_fp16 = reshape(shape = var_554, x = linear_26_cast_fp16)[name = string("op_555_cast_fp16")];
+            tensor<int32, [4]> var_556 = const()[name = string("op_556"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_9_transpose_x_0 = const()[name = string("qk_9_transpose_x_0"), val = bool(false)];
+            bool qk_9_transpose_y_0 = const()[name = string("qk_9_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_32_perm_0 = const()[name = string("transpose_32_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_33 = transpose(perm = transpose_33_perm_0, x = k_19_cast_fp16)[name = string("transpose_41")];
+            tensor<fp16, [1, 8, 1500, 64]> transpose_32 = transpose(perm = transpose_32_perm_0, x = q_19_cast_fp16)[name = string("transpose_42")];
+            tensor<fp16, [1, 8, 1500, 1500]> qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_32, y = transpose_33)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 8, 1500, 1500]> var_560_cast_fp16 = softmax(axis = var_496, x = qk_9_cast_fp16)[name = string("op_560_cast_fp16")];
+            bool var_562_transpose_x_0 = const()[name = string("op_562_transpose_x_0"), val = bool(false)];
+            bool var_562_transpose_y_0 = const()[name = string("op_562_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_556, x = var_555_cast_fp16)[name = string("transpose_43")];
+            tensor<fp16, [1, 8, 1500, 64]> var_562_cast_fp16 = matmul(transpose_x = var_562_transpose_x_0, transpose_y = var_562_transpose_y_0, x = var_560_cast_fp16, y = v_19_cast_fp16)[name = string("op_562_cast_fp16")];
+            tensor<int32, [4]> var_563 = const()[name = string("op_563"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_4 = const()[name = string("concat_4"), val = tensor<int32, [3]>([1, 1500, 512])];
+            tensor<fp16, [1, 1500, 8, 64]> var_564_cast_fp16 = transpose(perm = var_563, x = var_562_cast_fp16)[name = string("transpose_40")];
+            tensor<fp16, [1, 1500, 512]> x_59_cast_fp16 = reshape(shape = concat_4, x = var_564_cast_fp16)[name = string("x_59_cast_fp16")];
+            tensor<fp16, [512, 512]> var_568_to_fp16 = const()[name = string("op_568_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30154368)))];
+            tensor<fp16, [512]> var_569_to_fp16 = const()[name = string("op_569_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30678720)))];
+            tensor<fp16, [1, 1500, 512]> linear_27_cast_fp16 = linear(bias = var_569_to_fp16, weight = var_568_to_fp16, x = x_59_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<int32, [1]> var_576_axes_0 = const()[name = string("op_576_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30679808)))];
+            tensor<fp16, [512]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30680896)))];
+            tensor<fp16, [1, 1500, 512]> var_576_cast_fp16 = layer_norm(axes = var_576_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_502_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = string("op_576_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_585_to_fp16 = const()[name = string("op_585_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30681984)))];
+            tensor<fp16, [2048]> var_586_to_fp16 = const()[name = string("op_586_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32779200)))];
+            tensor<fp16, [1, 1500, 2048]> linear_28_cast_fp16 = linear(bias = var_586_to_fp16, weight = var_585_to_fp16, x = var_576_cast_fp16)[name = string("linear_28_cast_fp16")];
+            string x_65_mode_0 = const()[name = string("x_65_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 2048]> x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = string("x_65_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_591_to_fp16 = const()[name = string("op_591_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32783360)))];
+            tensor<fp16, [512]> var_592_to_fp16 = const()[name = string("op_592_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34880576)))];
+            tensor<fp16, [1, 1500, 512]> linear_29_cast_fp16 = linear(bias = var_592_to_fp16, weight = var_591_to_fp16, x = x_65_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = string("x_67_cast_fp16")];
+            int32 var_602 = const()[name = string("op_602"), val = int32(-1)];
+            tensor<int32, [1]> var_618_axes_0 = const()[name = string("op_618_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34881664)))];
+            tensor<fp16, [512]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34882752)))];
+            fp16 var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 512]> var_618_cast_fp16 = layer_norm(axes = var_618_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = string("op_618_cast_fp16")];
+            tensor<fp16, [512, 512]> var_629_to_fp16 = const()[name = string("op_629_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34883840)))];
+            tensor<fp16, [512]> var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35408192)))];
+            tensor<fp16, [1, 1500, 512]> linear_30_cast_fp16 = linear(bias = var_630_to_fp16, weight = var_629_to_fp16, x = var_618_cast_fp16)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [512, 512]> var_633_to_fp16 = const()[name = string("op_633_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35409280)))];
+            tensor<fp16, [1, 1500, 512]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_633_to_fp16, x = var_618_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [512, 512]> var_637_to_fp16 = const()[name = string("op_637_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35933632)))];
+            tensor<fp16, [512]> var_638_to_fp16 = const()[name = string("op_638_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36457984)))];
+            tensor<fp16, [1, 1500, 512]> linear_32_cast_fp16 = linear(bias = var_638_to_fp16, weight = var_637_to_fp16, x = var_618_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<int32, [4]> var_646 = const()[name = string("op_646"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_647_cast_fp16 = reshape(shape = var_646, x = linear_30_cast_fp16)[name = string("op_647_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_52_to_fp16 = const()[name = string("const_52_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> q_cast_fp16 = mul(x = var_647_cast_fp16, y = const_52_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_653 = const()[name = string("op_653"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_654_cast_fp16 = reshape(shape = var_653, x = linear_31_cast_fp16)[name = string("op_654_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 8, 64]> k_cast_fp16 = mul(x = var_654_cast_fp16, y = const_53_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_660 = const()[name = string("op_660"), val = tensor<int32, [4]>([1, 1500, 8, -1])];
+            tensor<fp16, [1, 1500, 8, 64]> var_661_cast_fp16 = reshape(shape = var_660, x = linear_32_cast_fp16)[name = string("op_661_cast_fp16")];
+            tensor<int32, [4]> var_662 = const()[name = string("op_662"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 8, 64, 1500]> transpose_35 = transpose(perm = transpose_35_perm_0, x = k_cast_fp16)[name = string("transpose_37")];
+            tensor<fp16, [1, 8, 1500, 64]> transpose_34 = transpose(perm = transpose_34_perm_0, x = q_cast_fp16)[name = string("transpose_38")];
+            tensor<fp16, [1, 8, 1500, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_34, y = transpose_35)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 8, 1500, 1500]> var_666_cast_fp16 = softmax(axis = var_602, x = qk_cast_fp16)[name = string("op_666_cast_fp16")];
+            bool var_668_transpose_x_0 = const()[name = string("op_668_transpose_x_0"), val = bool(false)];
+            bool var_668_transpose_y_0 = const()[name = string("op_668_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 8, 1500, 64]> v_cast_fp16 = transpose(perm = var_662, x = var_661_cast_fp16)[name = string("transpose_39")];
+            tensor<fp16, [1, 8, 1500, 64]> var_668_cast_fp16 = matmul(transpose_x = var_668_transpose_x_0, transpose_y = var_668_transpose_y_0, x = var_666_cast_fp16, y = v_cast_fp16)[name = string("op_668_cast_fp16")];
+            tensor<int32, [4]> var_669 = const()[name = string("op_669"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [3]>([1, 1500, 512])];
+            tensor<fp16, [1, 1500, 8, 64]> var_670_cast_fp16 = transpose(perm = var_669, x = var_668_cast_fp16)[name = string("transpose_36")];
+            tensor<fp16, [1, 1500, 512]> x_71_cast_fp16 = reshape(shape = concat_5, x = var_670_cast_fp16)[name = string("x_71_cast_fp16")];
+            tensor<fp16, [512, 512]> var_674_to_fp16 = const()[name = string("op_674_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36459072)))];
+            tensor<fp16, [512]> var_675_to_fp16 = const()[name = string("op_675_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36983424)))];
+            tensor<fp16, [1, 1500, 512]> linear_33_cast_fp16 = linear(bias = var_675_to_fp16, weight = var_674_to_fp16, x = x_71_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<int32, [1]> var_682_axes_0 = const()[name = string("op_682_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36984512)))];
+            tensor<fp16, [512]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36985600)))];
+            tensor<fp16, [1, 1500, 512]> var_682_cast_fp16 = layer_norm(axes = var_682_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = string("op_682_cast_fp16")];
+            tensor<fp16, [2048, 512]> var_691_to_fp16 = const()[name = string("op_691_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36986688)))];
+            tensor<fp16, [2048]> var_692_to_fp16 = const()[name = string("op_692_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39083904)))];
+            tensor<fp16, [1, 1500, 2048]> linear_34_cast_fp16 = linear(bias = var_692_to_fp16, weight = var_691_to_fp16, x = var_682_cast_fp16)[name = string("linear_34_cast_fp16")];
+            string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 2048]> x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = string("x_77_cast_fp16")];
+            tensor<fp16, [512, 2048]> var_697_to_fp16 = const()[name = string("op_697_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39088064)))];
+            tensor<fp16, [512]> var_698_to_fp16 = const()[name = string("op_698_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41185280)))];
+            tensor<fp16, [1, 1500, 512]> linear_35_cast_fp16 = linear(bias = var_698_to_fp16, weight = var_697_to_fp16, x = x_77_cast_fp16)[name = string("linear_35_cast_fp16")];
+            tensor<fp16, [1, 1500, 512]> x_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = string("x_cast_fp16")];
+            tensor<int32, [1]> var_711_axes_0 = const()[name = string("op_711_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [512]> ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41186368)))];
+            tensor<fp16, [512]> ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41187456)))];
+            fp16 var_702_to_fp16 = const()[name = string("op_702_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 512]> output = layer_norm(axes = var_711_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_702_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_711_cast_fp16")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/base/encoder.mlmodelc/weights/weight.bin b/base/encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9d9f2ba5d1a4cbde8e0e1b8f185ff5b7be0ce74d
--- /dev/null
+++ b/base/encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c45bee989219532c4cec616d439c51f280ac9d7b04f7847c4b7d7daba1d47523
+size 41188544
diff --git a/base/model_dims.json b/base/model_dims.json
new file mode 100644
index 0000000000000000000000000000000000000000..749969fc8563817aae59f05daa17e1a062f383a4
--- /dev/null
+++ b/base/model_dims.json
@@ -0,0 +1,12 @@
+{
+  "n_mels": 80,
+  "n_audio_ctx": 1500,
+  "n_audio_state": 512,
+  "n_audio_head": 8,
+  "n_audio_layer": 6,
+  "n_vocab": 51865,
+  "n_text_ctx": 448,
+  "n_text_state": 512,
+  "n_text_head": 8,
+  "n_text_layer": 6
+}
\ No newline at end of file
diff --git a/compile_model.sh b/compile_model.sh
new file mode 100755
index 0000000000000000000000000000000000000000..8b92a248c93902fa29db67e23daa719b5bdb433b
--- /dev/null
+++ b/compile_model.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+for d in work/*
+do
+    echo $d
+    pushd $d >/dev/null
+
+    if [ -d encoder ]; then
+        xcrun coremlcompiler compile encoder/chunked_pipeline.mlpackage .
+        rm -rf encoder.mlmodelc
+        mv chunked_pipeline.mlmodelc encoder.mlmodelc
+    else
+        xcrun coremlcompiler compile encoder.mlpackage .
+    fi
+    xcrun coremlcompiler compile decoder_first.mlpackage .
+    xcrun coremlcompiler compile decoder_second.mlpackage .
+
+    popd >/dev/null
+done
+
+mkdir -p output
+for d in work/*
+do
+    out=${d/work/output}
+    mkdir -p $out
+    mv $d/*.mlmodelc $d/model_dims.json $out/
+done
+
+mkdir -p index
+for d in output/*
+do
+    model=${d##*/}
+    (cd $d && find * -type f) > index/$model
+done
\ No newline at end of file
diff --git a/index/base b/index/base
new file mode 100644
index 0000000000000000000000000000000000000000..8d76ac58164add489901c566cff97bec4d955519
--- /dev/null
+++ b/index/base
@@ -0,0 +1,16 @@
+decoder_first.mlmodelc/weights/weight.bin
+decoder_first.mlmodelc/metadata.json
+decoder_first.mlmodelc/model.mil
+decoder_first.mlmodelc/coremldata.bin
+decoder_first.mlmodelc/analytics/coremldata.bin
+decoder_second.mlmodelc/weights/weight.bin
+decoder_second.mlmodelc/metadata.json
+decoder_second.mlmodelc/model.mil
+decoder_second.mlmodelc/coremldata.bin
+decoder_second.mlmodelc/analytics/coremldata.bin
+encoder.mlmodelc/weights/weight.bin
+encoder.mlmodelc/metadata.json
+encoder.mlmodelc/model.mil
+encoder.mlmodelc/coremldata.bin
+encoder.mlmodelc/analytics/coremldata.bin
+model_dims.json
diff --git a/index/large-v2 b/index/large-v2
new file mode 100644
index 0000000000000000000000000000000000000000..e9441a23015009aced8ebd0cb6b7d41f2f6318bb
--- /dev/null
+++ b/index/large-v2
@@ -0,0 +1,22 @@
+decoder_first.mlmodelc/weights/weight.bin
+decoder_first.mlmodelc/metadata.json
+decoder_first.mlmodelc/model.mil
+decoder_first.mlmodelc/coremldata.bin
+decoder_first.mlmodelc/analytics/coremldata.bin
+decoder_second.mlmodelc/weights/weight.bin
+decoder_second.mlmodelc/metadata.json
+decoder_second.mlmodelc/model.mil
+decoder_second.mlmodelc/coremldata.bin
+decoder_second.mlmodelc/analytics/coremldata.bin
+encoder.mlmodelc/metadata.json
+encoder.mlmodelc/model0/weights/0-weight.bin
+encoder.mlmodelc/model0/model.mil
+encoder.mlmodelc/model0/coremldata.bin
+encoder.mlmodelc/model0/analytics/coremldata.bin
+encoder.mlmodelc/model1/weights/1-weight.bin
+encoder.mlmodelc/model1/model.mil
+encoder.mlmodelc/model1/coremldata.bin
+encoder.mlmodelc/model1/analytics/coremldata.bin
+encoder.mlmodelc/coremldata.bin
+encoder.mlmodelc/analytics/coremldata.bin
+model_dims.json
diff --git a/index/large-v3 b/index/large-v3
new file mode 100644
index 0000000000000000000000000000000000000000..e9441a23015009aced8ebd0cb6b7d41f2f6318bb
--- /dev/null
+++ b/index/large-v3
@@ -0,0 +1,22 @@
+decoder_first.mlmodelc/weights/weight.bin
+decoder_first.mlmodelc/metadata.json
+decoder_first.mlmodelc/model.mil
+decoder_first.mlmodelc/coremldata.bin
+decoder_first.mlmodelc/analytics/coremldata.bin
+decoder_second.mlmodelc/weights/weight.bin
+decoder_second.mlmodelc/metadata.json
+decoder_second.mlmodelc/model.mil
+decoder_second.mlmodelc/coremldata.bin
+decoder_second.mlmodelc/analytics/coremldata.bin
+encoder.mlmodelc/metadata.json
+encoder.mlmodelc/model0/weights/0-weight.bin
+encoder.mlmodelc/model0/model.mil
+encoder.mlmodelc/model0/coremldata.bin
+encoder.mlmodelc/model0/analytics/coremldata.bin
+encoder.mlmodelc/model1/weights/1-weight.bin
+encoder.mlmodelc/model1/model.mil
+encoder.mlmodelc/model1/coremldata.bin
+encoder.mlmodelc/model1/analytics/coremldata.bin
+encoder.mlmodelc/coremldata.bin
+encoder.mlmodelc/analytics/coremldata.bin
+model_dims.json
diff --git a/index/medium b/index/medium
new file mode 100644
index 0000000000000000000000000000000000000000..8d76ac58164add489901c566cff97bec4d955519
--- /dev/null
+++ b/index/medium
@@ -0,0 +1,16 @@
+decoder_first.mlmodelc/weights/weight.bin
+decoder_first.mlmodelc/metadata.json
+decoder_first.mlmodelc/model.mil
+decoder_first.mlmodelc/coremldata.bin
+decoder_first.mlmodelc/analytics/coremldata.bin
+decoder_second.mlmodelc/weights/weight.bin
+decoder_second.mlmodelc/metadata.json
+decoder_second.mlmodelc/model.mil
+decoder_second.mlmodelc/coremldata.bin
+decoder_second.mlmodelc/analytics/coremldata.bin
+encoder.mlmodelc/weights/weight.bin
+encoder.mlmodelc/metadata.json
+encoder.mlmodelc/model.mil
+encoder.mlmodelc/coremldata.bin
+encoder.mlmodelc/analytics/coremldata.bin
+model_dims.json
diff --git a/index/small b/index/small
new file mode 100644
index 0000000000000000000000000000000000000000..8d76ac58164add489901c566cff97bec4d955519
--- /dev/null
+++ b/index/small
@@ -0,0 +1,16 @@
+decoder_first.mlmodelc/weights/weight.bin
+decoder_first.mlmodelc/metadata.json
+decoder_first.mlmodelc/model.mil
+decoder_first.mlmodelc/coremldata.bin
+decoder_first.mlmodelc/analytics/coremldata.bin
+decoder_second.mlmodelc/weights/weight.bin
+decoder_second.mlmodelc/metadata.json
+decoder_second.mlmodelc/model.mil
+decoder_second.mlmodelc/coremldata.bin
+decoder_second.mlmodelc/analytics/coremldata.bin
+encoder.mlmodelc/weights/weight.bin
+encoder.mlmodelc/metadata.json
+encoder.mlmodelc/model.mil
+encoder.mlmodelc/coremldata.bin
+encoder.mlmodelc/analytics/coremldata.bin
+model_dims.json
diff --git a/index/tiny b/index/tiny
new file mode 100644
index 0000000000000000000000000000000000000000..8d76ac58164add489901c566cff97bec4d955519
--- /dev/null
+++ b/index/tiny
@@ -0,0 +1,16 @@
+decoder_first.mlmodelc/weights/weight.bin
+decoder_first.mlmodelc/metadata.json
+decoder_first.mlmodelc/model.mil
+decoder_first.mlmodelc/coremldata.bin
+decoder_first.mlmodelc/analytics/coremldata.bin
+decoder_second.mlmodelc/weights/weight.bin
+decoder_second.mlmodelc/metadata.json
+decoder_second.mlmodelc/model.mil
+decoder_second.mlmodelc/coremldata.bin
+decoder_second.mlmodelc/analytics/coremldata.bin
+encoder.mlmodelc/weights/weight.bin
+encoder.mlmodelc/metadata.json
+encoder.mlmodelc/model.mil
+encoder.mlmodelc/coremldata.bin
+encoder.mlmodelc/analytics/coremldata.bin
+model_dims.json
diff --git a/large-v2/decoder_first.mlmodelc/analytics/coremldata.bin b/large-v2/decoder_first.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b3502c1971106c8ddba15a6d19cbe212e9040b51
--- /dev/null
+++ b/large-v2/decoder_first.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a564dfd67cfcb3c0ee8cd9f7ef9f303fbfc561e635709bd3a46c5870571079de
+size 243
diff --git a/large-v2/decoder_first.mlmodelc/coremldata.bin b/large-v2/decoder_first.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..78fa71791f49b098c63687ec844348e5cd25cd92
--- /dev/null
+++ b/large-v2/decoder_first.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6214be9e110a102836fb1fdb960a2fb564e60f5d9e3d1e25a9b7f978309480e
+size 453
diff --git a/large-v2/decoder_first.mlmodelc/metadata.json b/large-v2/decoder_first.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..62548d3742d04f712f1bad76294f859bb5029d22
--- /dev/null
+++ b/large-v2/decoder_first.mlmodelc/metadata.json
@@ -0,0 +1,106 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "dummy",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.writeState" : 66,
+      "Shape" : 64,
+      "Ios18.linear" : 64,
+      "Identity" : 1,
+      "Ios18.gather" : 64,
+      "Ios18.concat" : 64,
+      "Ios18.sliceUpdate" : 66,
+      "Ios18.cast" : 128,
+      "Ios18.expandDims" : 64,
+      "Ios18.readState" : 66
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 448 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 448, 1280]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 448 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 448, 1280]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 1500, 1280]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 1500, 1280]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...1500 × 1280",
+        "shapeRange" : "[[1, 1], [1, 1500], [1280, 1280]]",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1280)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1, 1280]",
+        "name" : "audio_data",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_first",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/large-v2/decoder_first.mlmodelc/model.mil b/large-v2/decoder_first.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..5e9505ec80acb3d396de560006ff76f4da79cc6a
--- /dev/null
+++ b/large-v2/decoder_first.mlmodelc/model.mil
@@ -0,0 +1,1851 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [1, ?, 1280]> audio_data, state<tensor<fp16, [32, 1, 448, 1280]>> k_cache1, state<tensor<fp16, [32, 1, 1500, 1280]>> k_cache2, state<tensor<fp16, [32, 1, 448, 1280]>> v_cache1, state<tensor<fp16, [32, 1, 1500, 1280]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 1280]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [1280, 1280]]}})))] {
+            tensor<fp16, [1, ?, 1280]> dummy = identity(x = audio_data)[name = string("identity_0")];
+            tensor<fp16, [32, 1, 448, 1280]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor<fp16, [32, 1, 448, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<fp16, [32, 1, 1500, 1280]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<fp16, [1280, 1280]> var_131_to_fp16 = const()[name = string("op_131_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36700288)))];
+            tensor<fp16, [1280]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39977152)))];
+            tensor<fp16, [1, ?, 1280]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_131_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_135_to_fp16 = const()[name = string("op_135_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39979776)))];
+            tensor<fp16, [1280]> var_136_to_fp16 = const()[name = string("op_136_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43256640)))];
+            tensor<fp16, [1, ?, 1280]> linear_1_cast_fp16 = linear(bias = var_136_to_fp16, weight = var_135_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")];
+            tensor<int32, [3]> var_138_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_138_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_138_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_138_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [3]> var_138_shape_cast_fp16_to_int16 = cast(dtype = var_138_shape_cast_fp16_to_int16_dtype_0, x = var_138_shape_cast_fp16)[name = string("cast_199")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_138_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_198")];
+            tensor<int32, [1]> expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")];
+            tensor<int32, [4]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
+            bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_68_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_68 = read_state(input = k_cache2)[name = string("coreml_update_state_68")];
+            tensor<int32, [3]> var_143_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_143_shape_cast_fp16")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_143_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_143_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_143_shape_cast_fp16_to_uint16 = cast(dtype = var_143_shape_cast_fp16_to_uint16_dtype_0, x = var_143_shape_cast_fp16)[name = string("cast_197")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_143_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_196")];
+            tensor<int32, [1]> expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")];
+            tensor<int32, [4]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)];
+            bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_69_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_69 = read_state(input = v_cache2)[name = string("coreml_update_state_69")];
+            tensor<fp16, [1280, 1280]> var_165_to_fp16 = const()[name = string("op_165_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43259264)))];
+            tensor<fp16, [1, ?, 1280]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_165_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_169_to_fp16 = const()[name = string("op_169_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46536128)))];
+            tensor<fp16, [1280]> var_170_to_fp16 = const()[name = string("op_170_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49812992)))];
+            tensor<fp16, [1, ?, 1280]> linear_3_cast_fp16 = linear(bias = var_170_to_fp16, weight = var_169_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")];
+            tensor<int32, [3]> var_172_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_172_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_172_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_172_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_172_shape_cast_fp16_to_uint16 = cast(dtype = var_172_shape_cast_fp16_to_uint16_dtype_0, x = var_172_shape_cast_fp16)[name = string("cast_195")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_172_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_194")];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")];
+            tensor<int32, [4]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)];
+            bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_68)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_70_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_70 = read_state(input = k_cache2)[name = string("coreml_update_state_70")];
+            tensor<int32, [3]> var_177_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_177_shape_cast_fp16")];
+            int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)];
+            int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)];
+            bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)];
+            string var_177_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_177_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_177_shape_cast_fp16_to_uint16 = cast(dtype = var_177_shape_cast_fp16_to_uint16_dtype_0, x = var_177_shape_cast_fp16)[name = string("cast_193")];
+            uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_177_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")];
+            string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_192")];
+            tensor<int32, [1]> expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")];
+            tensor<int32, [4]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_69)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_71_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_71 = read_state(input = v_cache2)[name = string("coreml_update_state_71")];
+            tensor<fp16, [1280, 1280]> var_199_to_fp16 = const()[name = string("op_199_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49815616)))];
+            tensor<fp16, [1, ?, 1280]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_199_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_203_to_fp16 = const()[name = string("op_203_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53092480)))];
+            tensor<fp16, [1280]> var_204_to_fp16 = const()[name = string("op_204_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56369344)))];
+            tensor<fp16, [1, ?, 1280]> linear_5_cast_fp16 = linear(bias = var_204_to_fp16, weight = var_203_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")];
+            tensor<int32, [3]> var_206_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_206_shape_cast_fp16")];
+            int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)];
+            int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)];
+            bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)];
+            string var_206_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_206_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_206_shape_cast_fp16_to_uint16 = cast(dtype = var_206_shape_cast_fp16_to_uint16_dtype_0, x = var_206_shape_cast_fp16)[name = string("cast_191")];
+            uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_206_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")];
+            string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_190")];
+            tensor<int32, [1]> expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")];
+            tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)];
+            bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_70)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_72_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_72 = read_state(input = k_cache2)[name = string("coreml_update_state_72")];
+            tensor<int32, [3]> var_211_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_211_shape_cast_fp16")];
+            int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)];
+            int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)];
+            bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)];
+            string var_211_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_211_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_211_shape_cast_fp16_to_uint16 = cast(dtype = var_211_shape_cast_fp16_to_uint16_dtype_0, x = var_211_shape_cast_fp16)[name = string("cast_189")];
+            uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_211_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")];
+            string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_188")];
+            tensor<int32, [1]> expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")];
+            tensor<int32, [4]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)];
+            bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_71)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_73_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_73 = read_state(input = v_cache2)[name = string("coreml_update_state_73")];
+            tensor<fp16, [1280, 1280]> var_233_to_fp16 = const()[name = string("op_233_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56371968)))];
+            tensor<fp16, [1, ?, 1280]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_233_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_237_to_fp16 = const()[name = string("op_237_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59648832)))];
+            tensor<fp16, [1280]> var_238_to_fp16 = const()[name = string("op_238_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62925696)))];
+            tensor<fp16, [1, ?, 1280]> linear_7_cast_fp16 = linear(bias = var_238_to_fp16, weight = var_237_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")];
+            tensor<int32, [3]> var_240_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_240_shape_cast_fp16")];
+            int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)];
+            int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)];
+            bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)];
+            string var_240_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_240_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_240_shape_cast_fp16_to_uint16 = cast(dtype = var_240_shape_cast_fp16_to_uint16_dtype_0, x = var_240_shape_cast_fp16)[name = string("cast_187")];
+            uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_240_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")];
+            string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_186")];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")];
+            tensor<int32, [4]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)];
+            bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_72)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_74_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_74 = read_state(input = k_cache2)[name = string("coreml_update_state_74")];
+            tensor<int32, [3]> var_245_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_245_shape_cast_fp16")];
+            int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)];
+            int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)];
+            bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)];
+            string var_245_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_245_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_245_shape_cast_fp16_to_uint16 = cast(dtype = var_245_shape_cast_fp16_to_uint16_dtype_0, x = var_245_shape_cast_fp16)[name = string("cast_185")];
+            uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_245_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")];
+            string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_184")];
+            tensor<int32, [1]> expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")];
+            tensor<int32, [4]> concat_26 = const()[name = string("concat_26"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_73)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_75_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_75 = read_state(input = v_cache2)[name = string("coreml_update_state_75")];
+            tensor<fp16, [1280, 1280]> var_267_to_fp16 = const()[name = string("op_267_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62928320)))];
+            tensor<fp16, [1, ?, 1280]> linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_267_to_fp16, x = audio_data)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_271_to_fp16 = const()[name = string("op_271_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66205184)))];
+            tensor<fp16, [1280]> var_272_to_fp16 = const()[name = string("op_272_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69482048)))];
+            tensor<fp16, [1, ?, 1280]> linear_9_cast_fp16 = linear(bias = var_272_to_fp16, weight = var_271_to_fp16, x = audio_data)[name = string("linear_9_cast_fp16")];
+            tensor<int32, [3]> var_274_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_274_shape_cast_fp16")];
+            int32 gather_8_axis_0 = const()[name = string("gather_8_axis_0"), val = int32(0)];
+            int32 gather_8_batch_dims_0 = const()[name = string("gather_8_batch_dims_0"), val = int32(0)];
+            bool gather_8_validate_indices_0 = const()[name = string("gather_8_validate_indices_0"), val = bool(false)];
+            string var_274_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_274_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_8_to_uint16 = const()[name = string("select_8_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_274_shape_cast_fp16_to_uint16 = cast(dtype = var_274_shape_cast_fp16_to_uint16_dtype_0, x = var_274_shape_cast_fp16)[name = string("cast_183")];
+            uint16 gather_8_cast_uint16 = gather(axis = gather_8_axis_0, batch_dims = gather_8_batch_dims_0, indices = select_8_to_uint16, validate_indices = gather_8_validate_indices_0, x = var_274_shape_cast_fp16_to_uint16)[name = string("gather_8_cast_uint16")];
+            string gather_8_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_8_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_43_axes_0 = const()[name = string("expand_dims_43_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_8_cast_uint16_to_int32 = cast(dtype = gather_8_cast_uint16_to_int32_dtype_0, x = gather_8_cast_uint16)[name = string("cast_182")];
+            tensor<int32, [1]> expand_dims_43 = expand_dims(axes = expand_dims_43_axes_0, x = gather_8_cast_uint16_to_int32)[name = string("expand_dims_43")];
+            tensor<int32, [4]> concat_29 = const()[name = string("concat_29"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [1]> concat_30_values0_0 = const()[name = string("concat_30_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_30_values1_0 = const()[name = string("concat_30_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_30_values3_0 = const()[name = string("concat_30_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)];
+            bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (concat_30_values0_0, concat_30_values1_0, expand_dims_43, concat_30_values3_0))[name = string("concat_30")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_29, begin_mask = k_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_30, end_mask = k_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_5_stride_0, update = linear_8_cast_fp16, x = coreml_update_state_74)[name = string("k_cache2_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_5_cast_fp16, input = k_cache2)[name = string("coreml_update_state_76_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_76 = read_state(input = k_cache2)[name = string("coreml_update_state_76")];
+            tensor<int32, [3]> var_279_shape_cast_fp16 = shape(x = linear_9_cast_fp16)[name = string("op_279_shape_cast_fp16")];
+            int32 gather_9_axis_0 = const()[name = string("gather_9_axis_0"), val = int32(0)];
+            int32 gather_9_batch_dims_0 = const()[name = string("gather_9_batch_dims_0"), val = int32(0)];
+            bool gather_9_validate_indices_0 = const()[name = string("gather_9_validate_indices_0"), val = bool(false)];
+            string var_279_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_279_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_9_to_uint16 = const()[name = string("select_9_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_279_shape_cast_fp16_to_uint16 = cast(dtype = var_279_shape_cast_fp16_to_uint16_dtype_0, x = var_279_shape_cast_fp16)[name = string("cast_181")];
+            uint16 gather_9_cast_uint16 = gather(axis = gather_9_axis_0, batch_dims = gather_9_batch_dims_0, indices = select_9_to_uint16, validate_indices = gather_9_validate_indices_0, x = var_279_shape_cast_fp16_to_uint16)[name = string("gather_9_cast_uint16")];
+            string gather_9_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_9_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_47_axes_0 = const()[name = string("expand_dims_47_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_9_cast_uint16_to_int32 = cast(dtype = gather_9_cast_uint16_to_int32_dtype_0, x = gather_9_cast_uint16)[name = string("cast_180")];
+            tensor<int32, [1]> expand_dims_47 = expand_dims(axes = expand_dims_47_axes_0, x = gather_9_cast_uint16_to_int32)[name = string("expand_dims_47")];
+            tensor<int32, [4]> concat_32 = const()[name = string("concat_32"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [1]> concat_33_values0_0 = const()[name = string("concat_33_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_33_values1_0 = const()[name = string("concat_33_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_33_values3_0 = const()[name = string("concat_33_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)];
+            bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (concat_33_values0_0, concat_33_values1_0, expand_dims_47, concat_33_values3_0))[name = string("concat_33")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_32, begin_mask = v_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_33, end_mask = v_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_5_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_75)[name = string("v_cache2_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_5_cast_fp16, input = v_cache2)[name = string("coreml_update_state_77_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_77 = read_state(input = v_cache2)[name = string("coreml_update_state_77")];
+            tensor<fp16, [1280, 1280]> var_301_to_fp16 = const()[name = string("op_301_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69484672)))];
+            tensor<fp16, [1, ?, 1280]> linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_301_to_fp16, x = audio_data)[name = string("linear_10_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_305_to_fp16 = const()[name = string("op_305_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72761536)))];
+            tensor<fp16, [1280]> var_306_to_fp16 = const()[name = string("op_306_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76038400)))];
+            tensor<fp16, [1, ?, 1280]> linear_11_cast_fp16 = linear(bias = var_306_to_fp16, weight = var_305_to_fp16, x = audio_data)[name = string("linear_11_cast_fp16")];
+            tensor<int32, [3]> var_308_shape_cast_fp16 = shape(x = linear_10_cast_fp16)[name = string("op_308_shape_cast_fp16")];
+            int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)];
+            int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)];
+            bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)];
+            string var_308_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_308_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_308_shape_cast_fp16_to_uint16 = cast(dtype = var_308_shape_cast_fp16_to_uint16_dtype_0, x = var_308_shape_cast_fp16)[name = string("cast_179")];
+            uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_308_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")];
+            string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_178")];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = gather_10_cast_uint16_to_int32)[name = string("expand_dims_51")];
+            tensor<int32, [4]> concat_35 = const()[name = string("concat_35"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [1]> concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)];
+            bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, expand_dims_51, concat_36_values3_0))[name = string("concat_36")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_35, begin_mask = k_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_36, end_mask = k_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_6_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_76)[name = string("k_cache2_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_6_cast_fp16, input = k_cache2)[name = string("coreml_update_state_78_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_78 = read_state(input = k_cache2)[name = string("coreml_update_state_78")];
+            tensor<int32, [3]> var_313_shape_cast_fp16 = shape(x = linear_11_cast_fp16)[name = string("op_313_shape_cast_fp16")];
+            int32 gather_11_axis_0 = const()[name = string("gather_11_axis_0"), val = int32(0)];
+            int32 gather_11_batch_dims_0 = const()[name = string("gather_11_batch_dims_0"), val = int32(0)];
+            bool gather_11_validate_indices_0 = const()[name = string("gather_11_validate_indices_0"), val = bool(false)];
+            string var_313_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_313_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_11_to_uint16 = const()[name = string("select_11_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_313_shape_cast_fp16_to_uint16 = cast(dtype = var_313_shape_cast_fp16_to_uint16_dtype_0, x = var_313_shape_cast_fp16)[name = string("cast_177")];
+            uint16 gather_11_cast_uint16 = gather(axis = gather_11_axis_0, batch_dims = gather_11_batch_dims_0, indices = select_11_to_uint16, validate_indices = gather_11_validate_indices_0, x = var_313_shape_cast_fp16_to_uint16)[name = string("gather_11_cast_uint16")];
+            string gather_11_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_11_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_55_axes_0 = const()[name = string("expand_dims_55_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_11_cast_uint16_to_int32 = cast(dtype = gather_11_cast_uint16_to_int32_dtype_0, x = gather_11_cast_uint16)[name = string("cast_176")];
+            tensor<int32, [1]> expand_dims_55 = expand_dims(axes = expand_dims_55_axes_0, x = gather_11_cast_uint16_to_int32)[name = string("expand_dims_55")];
+            tensor<int32, [4]> concat_38 = const()[name = string("concat_38"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [1]> concat_39_values0_0 = const()[name = string("concat_39_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)];
+            bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (concat_39_values0_0, concat_39_values1_0, expand_dims_55, concat_39_values3_0))[name = string("concat_39")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_38, begin_mask = v_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_39, end_mask = v_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_6_stride_0, update = linear_11_cast_fp16, x = coreml_update_state_77)[name = string("v_cache2_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_6_cast_fp16, input = v_cache2)[name = string("coreml_update_state_79_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_79 = read_state(input = v_cache2)[name = string("coreml_update_state_79")];
+            tensor<fp16, [1280, 1280]> var_335_to_fp16 = const()[name = string("op_335_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76041024)))];
+            tensor<fp16, [1, ?, 1280]> linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_335_to_fp16, x = audio_data)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_339_to_fp16 = const()[name = string("op_339_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79317888)))];
+            tensor<fp16, [1280]> var_340_to_fp16 = const()[name = string("op_340_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82594752)))];
+            tensor<fp16, [1, ?, 1280]> linear_13_cast_fp16 = linear(bias = var_340_to_fp16, weight = var_339_to_fp16, x = audio_data)[name = string("linear_13_cast_fp16")];
+            tensor<int32, [3]> var_342_shape_cast_fp16 = shape(x = linear_12_cast_fp16)[name = string("op_342_shape_cast_fp16")];
+            int32 gather_12_axis_0 = const()[name = string("gather_12_axis_0"), val = int32(0)];
+            int32 gather_12_batch_dims_0 = const()[name = string("gather_12_batch_dims_0"), val = int32(0)];
+            bool gather_12_validate_indices_0 = const()[name = string("gather_12_validate_indices_0"), val = bool(false)];
+            string var_342_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_342_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_12_to_uint16 = const()[name = string("select_12_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_342_shape_cast_fp16_to_uint16 = cast(dtype = var_342_shape_cast_fp16_to_uint16_dtype_0, x = var_342_shape_cast_fp16)[name = string("cast_175")];
+            uint16 gather_12_cast_uint16 = gather(axis = gather_12_axis_0, batch_dims = gather_12_batch_dims_0, indices = select_12_to_uint16, validate_indices = gather_12_validate_indices_0, x = var_342_shape_cast_fp16_to_uint16)[name = string("gather_12_cast_uint16")];
+            string gather_12_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_12_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_59_axes_0 = const()[name = string("expand_dims_59_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_12_cast_uint16_to_int32 = cast(dtype = gather_12_cast_uint16_to_int32_dtype_0, x = gather_12_cast_uint16)[name = string("cast_174")];
+            tensor<int32, [1]> expand_dims_59 = expand_dims(axes = expand_dims_59_axes_0, x = gather_12_cast_uint16_to_int32)[name = string("expand_dims_59")];
+            tensor<int32, [4]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [1]> concat_42_values0_0 = const()[name = string("concat_42_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_42_values1_0 = const()[name = string("concat_42_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_42_values3_0 = const()[name = string("concat_42_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)];
+            bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (concat_42_values0_0, concat_42_values1_0, expand_dims_59, concat_42_values3_0))[name = string("concat_42")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_41, begin_mask = k_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_42, end_mask = k_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_7_stride_0, update = linear_12_cast_fp16, x = coreml_update_state_78)[name = string("k_cache2_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_7_cast_fp16, input = k_cache2)[name = string("coreml_update_state_80_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_80 = read_state(input = k_cache2)[name = string("coreml_update_state_80")];
+            tensor<int32, [3]> var_347_shape_cast_fp16 = shape(x = linear_13_cast_fp16)[name = string("op_347_shape_cast_fp16")];
+            int32 gather_13_axis_0 = const()[name = string("gather_13_axis_0"), val = int32(0)];
+            int32 gather_13_batch_dims_0 = const()[name = string("gather_13_batch_dims_0"), val = int32(0)];
+            bool gather_13_validate_indices_0 = const()[name = string("gather_13_validate_indices_0"), val = bool(false)];
+            string var_347_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_347_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_13_to_uint16 = const()[name = string("select_13_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_347_shape_cast_fp16_to_uint16 = cast(dtype = var_347_shape_cast_fp16_to_uint16_dtype_0, x = var_347_shape_cast_fp16)[name = string("cast_173")];
+            uint16 gather_13_cast_uint16 = gather(axis = gather_13_axis_0, batch_dims = gather_13_batch_dims_0, indices = select_13_to_uint16, validate_indices = gather_13_validate_indices_0, x = var_347_shape_cast_fp16_to_uint16)[name = string("gather_13_cast_uint16")];
+            string gather_13_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_13_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_63_axes_0 = const()[name = string("expand_dims_63_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_13_cast_uint16_to_int32 = cast(dtype = gather_13_cast_uint16_to_int32_dtype_0, x = gather_13_cast_uint16)[name = string("cast_172")];
+            tensor<int32, [1]> expand_dims_63 = expand_dims(axes = expand_dims_63_axes_0, x = gather_13_cast_uint16_to_int32)[name = string("expand_dims_63")];
+            tensor<int32, [4]> concat_44 = const()[name = string("concat_44"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [1]> concat_45_values0_0 = const()[name = string("concat_45_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_45_values1_0 = const()[name = string("concat_45_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_45_values3_0 = const()[name = string("concat_45_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_45_axis_0 = const()[name = string("concat_45_axis_0"), val = int32(0)];
+            bool concat_45_interleave_0 = const()[name = string("concat_45_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_45 = concat(axis = concat_45_axis_0, interleave = concat_45_interleave_0, values = (concat_45_values0_0, concat_45_values1_0, expand_dims_63, concat_45_values3_0))[name = string("concat_45")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_44, begin_mask = v_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_45, end_mask = v_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_7_stride_0, update = linear_13_cast_fp16, x = coreml_update_state_79)[name = string("v_cache2_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_7_cast_fp16, input = v_cache2)[name = string("coreml_update_state_81_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_81 = read_state(input = v_cache2)[name = string("coreml_update_state_81")];
+            tensor<fp16, [1280, 1280]> var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82597376)))];
+            tensor<fp16, [1, ?, 1280]> linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_369_to_fp16, x = audio_data)[name = string("linear_14_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_373_to_fp16 = const()[name = string("op_373_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85874240)))];
+            tensor<fp16, [1280]> var_374_to_fp16 = const()[name = string("op_374_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89151104)))];
+            tensor<fp16, [1, ?, 1280]> linear_15_cast_fp16 = linear(bias = var_374_to_fp16, weight = var_373_to_fp16, x = audio_data)[name = string("linear_15_cast_fp16")];
+            tensor<int32, [3]> var_376_shape_cast_fp16 = shape(x = linear_14_cast_fp16)[name = string("op_376_shape_cast_fp16")];
+            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
+            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
+            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
+            string var_376_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_376_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_376_shape_cast_fp16_to_uint16 = cast(dtype = var_376_shape_cast_fp16_to_uint16_dtype_0, x = var_376_shape_cast_fp16)[name = string("cast_171")];
+            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_376_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
+            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_170")];
+            tensor<int32, [1]> expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = gather_14_cast_uint16_to_int32)[name = string("expand_dims_67")];
+            tensor<int32, [4]> concat_47 = const()[name = string("concat_47"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_48_values1_0 = const()[name = string("concat_48_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_48_values3_0 = const()[name = string("concat_48_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
+            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, concat_48_values1_0, expand_dims_67, concat_48_values3_0))[name = string("concat_48")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_47, begin_mask = k_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_48, end_mask = k_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_8_stride_0, update = linear_14_cast_fp16, x = coreml_update_state_80)[name = string("k_cache2_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_8_cast_fp16, input = k_cache2)[name = string("coreml_update_state_82_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_82 = read_state(input = k_cache2)[name = string("coreml_update_state_82")];
+            tensor<int32, [3]> var_381_shape_cast_fp16 = shape(x = linear_15_cast_fp16)[name = string("op_381_shape_cast_fp16")];
+            int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)];
+            int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)];
+            bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)];
+            string var_381_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_381_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_381_shape_cast_fp16_to_uint16 = cast(dtype = var_381_shape_cast_fp16_to_uint16_dtype_0, x = var_381_shape_cast_fp16)[name = string("cast_169")];
+            uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_381_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")];
+            string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_71_axes_0 = const()[name = string("expand_dims_71_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_168")];
+            tensor<int32, [1]> expand_dims_71 = expand_dims(axes = expand_dims_71_axes_0, x = gather_15_cast_uint16_to_int32)[name = string("expand_dims_71")];
+            tensor<int32, [4]> concat_50 = const()[name = string("concat_50"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [1]> concat_51_values0_0 = const()[name = string("concat_51_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)];
+            bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (concat_51_values0_0, concat_51_values1_0, expand_dims_71, concat_51_values3_0))[name = string("concat_51")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_50, begin_mask = v_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_51, end_mask = v_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_8_stride_0, update = linear_15_cast_fp16, x = coreml_update_state_81)[name = string("v_cache2_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_8_cast_fp16, input = v_cache2)[name = string("coreml_update_state_83_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_83 = read_state(input = v_cache2)[name = string("coreml_update_state_83")];
+            tensor<fp16, [1280, 1280]> var_403_to_fp16 = const()[name = string("op_403_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89153728)))];
+            tensor<fp16, [1, ?, 1280]> linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_403_to_fp16, x = audio_data)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_407_to_fp16 = const()[name = string("op_407_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92430592)))];
+            tensor<fp16, [1280]> var_408_to_fp16 = const()[name = string("op_408_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95707456)))];
+            tensor<fp16, [1, ?, 1280]> linear_17_cast_fp16 = linear(bias = var_408_to_fp16, weight = var_407_to_fp16, x = audio_data)[name = string("linear_17_cast_fp16")];
+            tensor<int32, [3]> var_410_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_410_shape_cast_fp16")];
+            int32 gather_16_axis_0 = const()[name = string("gather_16_axis_0"), val = int32(0)];
+            int32 gather_16_batch_dims_0 = const()[name = string("gather_16_batch_dims_0"), val = int32(0)];
+            bool gather_16_validate_indices_0 = const()[name = string("gather_16_validate_indices_0"), val = bool(false)];
+            string var_410_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_410_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_16_to_uint16 = const()[name = string("select_16_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_410_shape_cast_fp16_to_uint16 = cast(dtype = var_410_shape_cast_fp16_to_uint16_dtype_0, x = var_410_shape_cast_fp16)[name = string("cast_167")];
+            uint16 gather_16_cast_uint16 = gather(axis = gather_16_axis_0, batch_dims = gather_16_batch_dims_0, indices = select_16_to_uint16, validate_indices = gather_16_validate_indices_0, x = var_410_shape_cast_fp16_to_uint16)[name = string("gather_16_cast_uint16")];
+            string gather_16_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_16_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_75_axes_0 = const()[name = string("expand_dims_75_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_16_cast_uint16_to_int32 = cast(dtype = gather_16_cast_uint16_to_int32_dtype_0, x = gather_16_cast_uint16)[name = string("cast_166")];
+            tensor<int32, [1]> expand_dims_75 = expand_dims(axes = expand_dims_75_axes_0, x = gather_16_cast_uint16_to_int32)[name = string("expand_dims_75")];
+            tensor<int32, [4]> concat_53 = const()[name = string("concat_53"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [1]> concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_54_values1_0 = const()[name = string("concat_54_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_54_values3_0 = const()[name = string("concat_54_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
+            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, concat_54_values1_0, expand_dims_75, concat_54_values3_0))[name = string("concat_54")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_53, begin_mask = k_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_54, end_mask = k_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_9_stride_0, update = linear_16_cast_fp16, x = coreml_update_state_82)[name = string("k_cache2_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_9_cast_fp16, input = k_cache2)[name = string("coreml_update_state_84_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_84 = read_state(input = k_cache2)[name = string("coreml_update_state_84")];
+            tensor<int32, [3]> var_415_shape_cast_fp16 = shape(x = linear_17_cast_fp16)[name = string("op_415_shape_cast_fp16")];
+            int32 gather_17_axis_0 = const()[name = string("gather_17_axis_0"), val = int32(0)];
+            int32 gather_17_batch_dims_0 = const()[name = string("gather_17_batch_dims_0"), val = int32(0)];
+            bool gather_17_validate_indices_0 = const()[name = string("gather_17_validate_indices_0"), val = bool(false)];
+            string var_415_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_415_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_17_to_uint16 = const()[name = string("select_17_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_415_shape_cast_fp16_to_uint16 = cast(dtype = var_415_shape_cast_fp16_to_uint16_dtype_0, x = var_415_shape_cast_fp16)[name = string("cast_165")];
+            uint16 gather_17_cast_uint16 = gather(axis = gather_17_axis_0, batch_dims = gather_17_batch_dims_0, indices = select_17_to_uint16, validate_indices = gather_17_validate_indices_0, x = var_415_shape_cast_fp16_to_uint16)[name = string("gather_17_cast_uint16")];
+            string gather_17_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_17_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_79_axes_0 = const()[name = string("expand_dims_79_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_17_cast_uint16_to_int32 = cast(dtype = gather_17_cast_uint16_to_int32_dtype_0, x = gather_17_cast_uint16)[name = string("cast_164")];
+            tensor<int32, [1]> expand_dims_79 = expand_dims(axes = expand_dims_79_axes_0, x = gather_17_cast_uint16_to_int32)[name = string("expand_dims_79")];
+            tensor<int32, [4]> concat_56 = const()[name = string("concat_56"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [1]> concat_57_values0_0 = const()[name = string("concat_57_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)];
+            bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (concat_57_values0_0, concat_57_values1_0, expand_dims_79, concat_57_values3_0))[name = string("concat_57")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_56, begin_mask = v_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_57, end_mask = v_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_9_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_83)[name = string("v_cache2_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_9_cast_fp16, input = v_cache2)[name = string("coreml_update_state_85_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_85 = read_state(input = v_cache2)[name = string("coreml_update_state_85")];
+            tensor<fp16, [1280, 1280]> var_437_to_fp16 = const()[name = string("op_437_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95710080)))];
+            tensor<fp16, [1, ?, 1280]> linear_18_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_437_to_fp16, x = audio_data)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_441_to_fp16 = const()[name = string("op_441_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98986944)))];
+            tensor<fp16, [1280]> var_442_to_fp16 = const()[name = string("op_442_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102263808)))];
+            tensor<fp16, [1, ?, 1280]> linear_19_cast_fp16 = linear(bias = var_442_to_fp16, weight = var_441_to_fp16, x = audio_data)[name = string("linear_19_cast_fp16")];
+            tensor<int32, [3]> var_444_shape_cast_fp16 = shape(x = linear_18_cast_fp16)[name = string("op_444_shape_cast_fp16")];
+            int32 gather_18_axis_0 = const()[name = string("gather_18_axis_0"), val = int32(0)];
+            int32 gather_18_batch_dims_0 = const()[name = string("gather_18_batch_dims_0"), val = int32(0)];
+            bool gather_18_validate_indices_0 = const()[name = string("gather_18_validate_indices_0"), val = bool(false)];
+            string var_444_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_444_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_18_to_uint16 = const()[name = string("select_18_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_444_shape_cast_fp16_to_uint16 = cast(dtype = var_444_shape_cast_fp16_to_uint16_dtype_0, x = var_444_shape_cast_fp16)[name = string("cast_163")];
+            uint16 gather_18_cast_uint16 = gather(axis = gather_18_axis_0, batch_dims = gather_18_batch_dims_0, indices = select_18_to_uint16, validate_indices = gather_18_validate_indices_0, x = var_444_shape_cast_fp16_to_uint16)[name = string("gather_18_cast_uint16")];
+            string gather_18_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_18_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_18_cast_uint16_to_int32 = cast(dtype = gather_18_cast_uint16_to_int32_dtype_0, x = gather_18_cast_uint16)[name = string("cast_162")];
+            tensor<int32, [1]> expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = gather_18_cast_uint16_to_int32)[name = string("expand_dims_83")];
+            tensor<int32, [4]> concat_59 = const()[name = string("concat_59"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [1]> concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
+            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, concat_60_values1_0, expand_dims_83, concat_60_values3_0))[name = string("concat_60")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_59, begin_mask = k_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_60, end_mask = k_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_10_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_84)[name = string("k_cache2_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_10_cast_fp16, input = k_cache2)[name = string("coreml_update_state_86_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_86 = read_state(input = k_cache2)[name = string("coreml_update_state_86")];
+            tensor<int32, [3]> var_449_shape_cast_fp16 = shape(x = linear_19_cast_fp16)[name = string("op_449_shape_cast_fp16")];
+            int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)];
+            int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)];
+            bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)];
+            string var_449_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_449_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_449_shape_cast_fp16_to_uint16 = cast(dtype = var_449_shape_cast_fp16_to_uint16_dtype_0, x = var_449_shape_cast_fp16)[name = string("cast_161")];
+            uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_449_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")];
+            string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_87_axes_0 = const()[name = string("expand_dims_87_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_160")];
+            tensor<int32, [1]> expand_dims_87 = expand_dims(axes = expand_dims_87_axes_0, x = gather_19_cast_uint16_to_int32)[name = string("expand_dims_87")];
+            tensor<int32, [4]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [1]> concat_63_values0_0 = const()[name = string("concat_63_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)];
+            bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (concat_63_values0_0, concat_63_values1_0, expand_dims_87, concat_63_values3_0))[name = string("concat_63")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_62, begin_mask = v_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_63, end_mask = v_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_10_stride_0, update = linear_19_cast_fp16, x = coreml_update_state_85)[name = string("v_cache2_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_10_cast_fp16, input = v_cache2)[name = string("coreml_update_state_87_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_87 = read_state(input = v_cache2)[name = string("coreml_update_state_87")];
+            tensor<fp16, [1280, 1280]> var_471_to_fp16 = const()[name = string("op_471_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102266432)))];
+            tensor<fp16, [1, ?, 1280]> linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_471_to_fp16, x = audio_data)[name = string("linear_20_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105543296)))];
+            tensor<fp16, [1280]> var_476_to_fp16 = const()[name = string("op_476_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108820160)))];
+            tensor<fp16, [1, ?, 1280]> linear_21_cast_fp16 = linear(bias = var_476_to_fp16, weight = var_475_to_fp16, x = audio_data)[name = string("linear_21_cast_fp16")];
+            tensor<int32, [3]> var_478_shape_cast_fp16 = shape(x = linear_20_cast_fp16)[name = string("op_478_shape_cast_fp16")];
+            int32 gather_20_axis_0 = const()[name = string("gather_20_axis_0"), val = int32(0)];
+            int32 gather_20_batch_dims_0 = const()[name = string("gather_20_batch_dims_0"), val = int32(0)];
+            bool gather_20_validate_indices_0 = const()[name = string("gather_20_validate_indices_0"), val = bool(false)];
+            string var_478_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_478_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_20_to_uint16 = const()[name = string("select_20_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_478_shape_cast_fp16_to_uint16 = cast(dtype = var_478_shape_cast_fp16_to_uint16_dtype_0, x = var_478_shape_cast_fp16)[name = string("cast_159")];
+            uint16 gather_20_cast_uint16 = gather(axis = gather_20_axis_0, batch_dims = gather_20_batch_dims_0, indices = select_20_to_uint16, validate_indices = gather_20_validate_indices_0, x = var_478_shape_cast_fp16_to_uint16)[name = string("gather_20_cast_uint16")];
+            string gather_20_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_20_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_91_axes_0 = const()[name = string("expand_dims_91_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_20_cast_uint16_to_int32 = cast(dtype = gather_20_cast_uint16_to_int32_dtype_0, x = gather_20_cast_uint16)[name = string("cast_158")];
+            tensor<int32, [1]> expand_dims_91 = expand_dims(axes = expand_dims_91_axes_0, x = gather_20_cast_uint16_to_int32)[name = string("expand_dims_91")];
+            tensor<int32, [4]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [1]> concat_66_values0_0 = const()[name = string("concat_66_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_66_values1_0 = const()[name = string("concat_66_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_66_values3_0 = const()[name = string("concat_66_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)];
+            bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (concat_66_values0_0, concat_66_values1_0, expand_dims_91, concat_66_values3_0))[name = string("concat_66")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_65, begin_mask = k_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_66, end_mask = k_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_11_stride_0, update = linear_20_cast_fp16, x = coreml_update_state_86)[name = string("k_cache2_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_11_cast_fp16, input = k_cache2)[name = string("coreml_update_state_88_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_88 = read_state(input = k_cache2)[name = string("coreml_update_state_88")];
+            tensor<int32, [3]> var_483_shape_cast_fp16 = shape(x = linear_21_cast_fp16)[name = string("op_483_shape_cast_fp16")];
+            int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)];
+            int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)];
+            bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)];
+            string var_483_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_483_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_483_shape_cast_fp16_to_uint16 = cast(dtype = var_483_shape_cast_fp16_to_uint16_dtype_0, x = var_483_shape_cast_fp16)[name = string("cast_157")];
+            uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_483_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")];
+            string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_95_axes_0 = const()[name = string("expand_dims_95_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_156")];
+            tensor<int32, [1]> expand_dims_95 = expand_dims(axes = expand_dims_95_axes_0, x = gather_21_cast_uint16_to_int32)[name = string("expand_dims_95")];
+            tensor<int32, [4]> concat_68 = const()[name = string("concat_68"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [1]> concat_69_values0_0 = const()[name = string("concat_69_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_69_values1_0 = const()[name = string("concat_69_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_69_values3_0 = const()[name = string("concat_69_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_69_axis_0 = const()[name = string("concat_69_axis_0"), val = int32(0)];
+            bool concat_69_interleave_0 = const()[name = string("concat_69_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_69 = concat(axis = concat_69_axis_0, interleave = concat_69_interleave_0, values = (concat_69_values0_0, concat_69_values1_0, expand_dims_95, concat_69_values3_0))[name = string("concat_69")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_68, begin_mask = v_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_69, end_mask = v_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_11_stride_0, update = linear_21_cast_fp16, x = coreml_update_state_87)[name = string("v_cache2_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_11_cast_fp16, input = v_cache2)[name = string("coreml_update_state_89_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_89 = read_state(input = v_cache2)[name = string("coreml_update_state_89")];
+            tensor<fp16, [1280, 1280]> var_505_to_fp16 = const()[name = string("op_505_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108822784)))];
+            tensor<fp16, [1, ?, 1280]> linear_22_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_505_to_fp16, x = audio_data)[name = string("linear_22_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_509_to_fp16 = const()[name = string("op_509_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112099648)))];
+            tensor<fp16, [1280]> var_510_to_fp16 = const()[name = string("op_510_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115376512)))];
+            tensor<fp16, [1, ?, 1280]> linear_23_cast_fp16 = linear(bias = var_510_to_fp16, weight = var_509_to_fp16, x = audio_data)[name = string("linear_23_cast_fp16")];
+            tensor<int32, [3]> var_512_shape_cast_fp16 = shape(x = linear_22_cast_fp16)[name = string("op_512_shape_cast_fp16")];
+            int32 gather_22_axis_0 = const()[name = string("gather_22_axis_0"), val = int32(0)];
+            int32 gather_22_batch_dims_0 = const()[name = string("gather_22_batch_dims_0"), val = int32(0)];
+            bool gather_22_validate_indices_0 = const()[name = string("gather_22_validate_indices_0"), val = bool(false)];
+            string var_512_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_512_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_22_to_uint16 = const()[name = string("select_22_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_512_shape_cast_fp16_to_uint16 = cast(dtype = var_512_shape_cast_fp16_to_uint16_dtype_0, x = var_512_shape_cast_fp16)[name = string("cast_155")];
+            uint16 gather_22_cast_uint16 = gather(axis = gather_22_axis_0, batch_dims = gather_22_batch_dims_0, indices = select_22_to_uint16, validate_indices = gather_22_validate_indices_0, x = var_512_shape_cast_fp16_to_uint16)[name = string("gather_22_cast_uint16")];
+            string gather_22_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_22_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_22_cast_uint16_to_int32 = cast(dtype = gather_22_cast_uint16_to_int32_dtype_0, x = gather_22_cast_uint16)[name = string("cast_154")];
+            tensor<int32, [1]> expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = gather_22_cast_uint16_to_int32)[name = string("expand_dims_99")];
+            tensor<int32, [4]> concat_71 = const()[name = string("concat_71"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [1]> concat_72_values0_0 = const()[name = string("concat_72_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_72_values1_0 = const()[name = string("concat_72_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_72_values3_0 = const()[name = string("concat_72_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)];
+            bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (concat_72_values0_0, concat_72_values1_0, expand_dims_99, concat_72_values3_0))[name = string("concat_72")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_71, begin_mask = k_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_72, end_mask = k_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_12_stride_0, update = linear_22_cast_fp16, x = coreml_update_state_88)[name = string("k_cache2_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_12_cast_fp16, input = k_cache2)[name = string("coreml_update_state_90_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_90 = read_state(input = k_cache2)[name = string("coreml_update_state_90")];
+            tensor<int32, [3]> var_517_shape_cast_fp16 = shape(x = linear_23_cast_fp16)[name = string("op_517_shape_cast_fp16")];
+            int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)];
+            int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)];
+            bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)];
+            string var_517_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_517_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_517_shape_cast_fp16_to_uint16 = cast(dtype = var_517_shape_cast_fp16_to_uint16_dtype_0, x = var_517_shape_cast_fp16)[name = string("cast_153")];
+            uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_517_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")];
+            string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_103_axes_0 = const()[name = string("expand_dims_103_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_152")];
+            tensor<int32, [1]> expand_dims_103 = expand_dims(axes = expand_dims_103_axes_0, x = gather_23_cast_uint16_to_int32)[name = string("expand_dims_103")];
+            tensor<int32, [4]> concat_74 = const()[name = string("concat_74"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [1]> concat_75_values0_0 = const()[name = string("concat_75_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)];
+            bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (concat_75_values0_0, concat_75_values1_0, expand_dims_103, concat_75_values3_0))[name = string("concat_75")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_74, begin_mask = v_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_75, end_mask = v_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_12_stride_0, update = linear_23_cast_fp16, x = coreml_update_state_89)[name = string("v_cache2_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_12_cast_fp16, input = v_cache2)[name = string("coreml_update_state_91_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_91 = read_state(input = v_cache2)[name = string("coreml_update_state_91")];
+            tensor<fp16, [1280, 1280]> var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115379136)))];
+            tensor<fp16, [1, ?, 1280]> linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_539_to_fp16, x = audio_data)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_543_to_fp16 = const()[name = string("op_543_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118656000)))];
+            tensor<fp16, [1280]> var_544_to_fp16 = const()[name = string("op_544_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121932864)))];
+            tensor<fp16, [1, ?, 1280]> linear_25_cast_fp16 = linear(bias = var_544_to_fp16, weight = var_543_to_fp16, x = audio_data)[name = string("linear_25_cast_fp16")];
+            tensor<int32, [3]> var_546_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_546_shape_cast_fp16")];
+            int32 gather_24_axis_0 = const()[name = string("gather_24_axis_0"), val = int32(0)];
+            int32 gather_24_batch_dims_0 = const()[name = string("gather_24_batch_dims_0"), val = int32(0)];
+            bool gather_24_validate_indices_0 = const()[name = string("gather_24_validate_indices_0"), val = bool(false)];
+            string var_546_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_546_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_24_to_uint16 = const()[name = string("select_24_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_546_shape_cast_fp16_to_uint16 = cast(dtype = var_546_shape_cast_fp16_to_uint16_dtype_0, x = var_546_shape_cast_fp16)[name = string("cast_151")];
+            uint16 gather_24_cast_uint16 = gather(axis = gather_24_axis_0, batch_dims = gather_24_batch_dims_0, indices = select_24_to_uint16, validate_indices = gather_24_validate_indices_0, x = var_546_shape_cast_fp16_to_uint16)[name = string("gather_24_cast_uint16")];
+            string gather_24_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_24_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_107_axes_0 = const()[name = string("expand_dims_107_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_24_cast_uint16_to_int32 = cast(dtype = gather_24_cast_uint16_to_int32_dtype_0, x = gather_24_cast_uint16)[name = string("cast_150")];
+            tensor<int32, [1]> expand_dims_107 = expand_dims(axes = expand_dims_107_axes_0, x = gather_24_cast_uint16_to_int32)[name = string("expand_dims_107")];
+            tensor<int32, [4]> concat_77 = const()[name = string("concat_77"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [1]> concat_78_values0_0 = const()[name = string("concat_78_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_78_values1_0 = const()[name = string("concat_78_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_78_values3_0 = const()[name = string("concat_78_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)];
+            bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (concat_78_values0_0, concat_78_values1_0, expand_dims_107, concat_78_values3_0))[name = string("concat_78")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_77, begin_mask = k_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_78, end_mask = k_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_13_stride_0, update = linear_24_cast_fp16, x = coreml_update_state_90)[name = string("k_cache2_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_13_cast_fp16, input = k_cache2)[name = string("coreml_update_state_92_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_92 = read_state(input = k_cache2)[name = string("coreml_update_state_92")];
+            tensor<int32, [3]> var_551_shape_cast_fp16 = shape(x = linear_25_cast_fp16)[name = string("op_551_shape_cast_fp16")];
+            int32 gather_25_axis_0 = const()[name = string("gather_25_axis_0"), val = int32(0)];
+            int32 gather_25_batch_dims_0 = const()[name = string("gather_25_batch_dims_0"), val = int32(0)];
+            bool gather_25_validate_indices_0 = const()[name = string("gather_25_validate_indices_0"), val = bool(false)];
+            string var_551_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_551_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_25_to_uint16 = const()[name = string("select_25_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_551_shape_cast_fp16_to_uint16 = cast(dtype = var_551_shape_cast_fp16_to_uint16_dtype_0, x = var_551_shape_cast_fp16)[name = string("cast_149")];
+            uint16 gather_25_cast_uint16 = gather(axis = gather_25_axis_0, batch_dims = gather_25_batch_dims_0, indices = select_25_to_uint16, validate_indices = gather_25_validate_indices_0, x = var_551_shape_cast_fp16_to_uint16)[name = string("gather_25_cast_uint16")];
+            string gather_25_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_25_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_111_axes_0 = const()[name = string("expand_dims_111_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_25_cast_uint16_to_int32 = cast(dtype = gather_25_cast_uint16_to_int32_dtype_0, x = gather_25_cast_uint16)[name = string("cast_148")];
+            tensor<int32, [1]> expand_dims_111 = expand_dims(axes = expand_dims_111_axes_0, x = gather_25_cast_uint16_to_int32)[name = string("expand_dims_111")];
+            tensor<int32, [4]> concat_80 = const()[name = string("concat_80"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [1]> concat_81_values0_0 = const()[name = string("concat_81_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_81_values3_0 = const()[name = string("concat_81_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
+            bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (concat_81_values0_0, concat_81_values1_0, expand_dims_111, concat_81_values3_0))[name = string("concat_81")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_80, begin_mask = v_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_81, end_mask = v_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_13_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_91)[name = string("v_cache2_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_13_cast_fp16, input = v_cache2)[name = string("coreml_update_state_93_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_93 = read_state(input = v_cache2)[name = string("coreml_update_state_93")];
+            tensor<fp16, [1280, 1280]> var_573_to_fp16 = const()[name = string("op_573_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121935488)))];
+            tensor<fp16, [1, ?, 1280]> linear_26_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_573_to_fp16, x = audio_data)[name = string("linear_26_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_577_to_fp16 = const()[name = string("op_577_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125212352)))];
+            tensor<fp16, [1280]> var_578_to_fp16 = const()[name = string("op_578_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128489216)))];
+            tensor<fp16, [1, ?, 1280]> linear_27_cast_fp16 = linear(bias = var_578_to_fp16, weight = var_577_to_fp16, x = audio_data)[name = string("linear_27_cast_fp16")];
+            tensor<int32, [3]> var_580_shape_cast_fp16 = shape(x = linear_26_cast_fp16)[name = string("op_580_shape_cast_fp16")];
+            int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
+            int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
+            bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
+            string var_580_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_580_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_580_shape_cast_fp16_to_uint16 = cast(dtype = var_580_shape_cast_fp16_to_uint16_dtype_0, x = var_580_shape_cast_fp16)[name = string("cast_147")];
+            uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_580_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
+            string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_146")];
+            tensor<int32, [1]> expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = gather_26_cast_uint16_to_int32)[name = string("expand_dims_115")];
+            tensor<int32, [4]> concat_83 = const()[name = string("concat_83"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [1]> concat_84_values0_0 = const()[name = string("concat_84_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)];
+            bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (concat_84_values0_0, concat_84_values1_0, expand_dims_115, concat_84_values3_0))[name = string("concat_84")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_83, begin_mask = k_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_84, end_mask = k_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_14_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_92)[name = string("k_cache2_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_14_cast_fp16, input = k_cache2)[name = string("coreml_update_state_94_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_94 = read_state(input = k_cache2)[name = string("coreml_update_state_94")];
+            tensor<int32, [3]> var_585_shape_cast_fp16 = shape(x = linear_27_cast_fp16)[name = string("op_585_shape_cast_fp16")];
+            int32 gather_27_axis_0 = const()[name = string("gather_27_axis_0"), val = int32(0)];
+            int32 gather_27_batch_dims_0 = const()[name = string("gather_27_batch_dims_0"), val = int32(0)];
+            bool gather_27_validate_indices_0 = const()[name = string("gather_27_validate_indices_0"), val = bool(false)];
+            string var_585_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_585_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_27_to_uint16 = const()[name = string("select_27_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_585_shape_cast_fp16_to_uint16 = cast(dtype = var_585_shape_cast_fp16_to_uint16_dtype_0, x = var_585_shape_cast_fp16)[name = string("cast_145")];
+            uint16 gather_27_cast_uint16 = gather(axis = gather_27_axis_0, batch_dims = gather_27_batch_dims_0, indices = select_27_to_uint16, validate_indices = gather_27_validate_indices_0, x = var_585_shape_cast_fp16_to_uint16)[name = string("gather_27_cast_uint16")];
+            string gather_27_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_27_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_119_axes_0 = const()[name = string("expand_dims_119_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_27_cast_uint16_to_int32 = cast(dtype = gather_27_cast_uint16_to_int32_dtype_0, x = gather_27_cast_uint16)[name = string("cast_144")];
+            tensor<int32, [1]> expand_dims_119 = expand_dims(axes = expand_dims_119_axes_0, x = gather_27_cast_uint16_to_int32)[name = string("expand_dims_119")];
+            tensor<int32, [4]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [1]> concat_87_values0_0 = const()[name = string("concat_87_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)];
+            bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (concat_87_values0_0, concat_87_values1_0, expand_dims_119, concat_87_values3_0))[name = string("concat_87")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_87, end_mask = v_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_14_stride_0, update = linear_27_cast_fp16, x = coreml_update_state_93)[name = string("v_cache2_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_14_cast_fp16, input = v_cache2)[name = string("coreml_update_state_95_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_95 = read_state(input = v_cache2)[name = string("coreml_update_state_95")];
+            tensor<fp16, [1280, 1280]> var_607_to_fp16 = const()[name = string("op_607_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128491840)))];
+            tensor<fp16, [1, ?, 1280]> linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_607_to_fp16, x = audio_data)[name = string("linear_28_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_611_to_fp16 = const()[name = string("op_611_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131768704)))];
+            tensor<fp16, [1280]> var_612_to_fp16 = const()[name = string("op_612_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135045568)))];
+            tensor<fp16, [1, ?, 1280]> linear_29_cast_fp16 = linear(bias = var_612_to_fp16, weight = var_611_to_fp16, x = audio_data)[name = string("linear_29_cast_fp16")];
+            tensor<int32, [3]> var_614_shape_cast_fp16 = shape(x = linear_28_cast_fp16)[name = string("op_614_shape_cast_fp16")];
+            int32 gather_28_axis_0 = const()[name = string("gather_28_axis_0"), val = int32(0)];
+            int32 gather_28_batch_dims_0 = const()[name = string("gather_28_batch_dims_0"), val = int32(0)];
+            bool gather_28_validate_indices_0 = const()[name = string("gather_28_validate_indices_0"), val = bool(false)];
+            string var_614_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_614_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_28_to_uint16 = const()[name = string("select_28_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_614_shape_cast_fp16_to_uint16 = cast(dtype = var_614_shape_cast_fp16_to_uint16_dtype_0, x = var_614_shape_cast_fp16)[name = string("cast_143")];
+            uint16 gather_28_cast_uint16 = gather(axis = gather_28_axis_0, batch_dims = gather_28_batch_dims_0, indices = select_28_to_uint16, validate_indices = gather_28_validate_indices_0, x = var_614_shape_cast_fp16_to_uint16)[name = string("gather_28_cast_uint16")];
+            string gather_28_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_28_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_123_axes_0 = const()[name = string("expand_dims_123_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_28_cast_uint16_to_int32 = cast(dtype = gather_28_cast_uint16_to_int32_dtype_0, x = gather_28_cast_uint16)[name = string("cast_142")];
+            tensor<int32, [1]> expand_dims_123 = expand_dims(axes = expand_dims_123_axes_0, x = gather_28_cast_uint16_to_int32)[name = string("expand_dims_123")];
+            tensor<int32, [4]> concat_89 = const()[name = string("concat_89"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [1]> concat_90_values0_0 = const()[name = string("concat_90_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_90_values1_0 = const()[name = string("concat_90_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_90_values3_0 = const()[name = string("concat_90_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)];
+            bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (concat_90_values0_0, concat_90_values1_0, expand_dims_123, concat_90_values3_0))[name = string("concat_90")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_89, begin_mask = k_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_90, end_mask = k_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_15_stride_0, update = linear_28_cast_fp16, x = coreml_update_state_94)[name = string("k_cache2_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_15_cast_fp16, input = k_cache2)[name = string("coreml_update_state_96_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_96 = read_state(input = k_cache2)[name = string("coreml_update_state_96")];
+            tensor<int32, [3]> var_619_shape_cast_fp16 = shape(x = linear_29_cast_fp16)[name = string("op_619_shape_cast_fp16")];
+            int32 gather_29_axis_0 = const()[name = string("gather_29_axis_0"), val = int32(0)];
+            int32 gather_29_batch_dims_0 = const()[name = string("gather_29_batch_dims_0"), val = int32(0)];
+            bool gather_29_validate_indices_0 = const()[name = string("gather_29_validate_indices_0"), val = bool(false)];
+            string var_619_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_619_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_29_to_uint16 = const()[name = string("select_29_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_619_shape_cast_fp16_to_uint16 = cast(dtype = var_619_shape_cast_fp16_to_uint16_dtype_0, x = var_619_shape_cast_fp16)[name = string("cast_141")];
+            uint16 gather_29_cast_uint16 = gather(axis = gather_29_axis_0, batch_dims = gather_29_batch_dims_0, indices = select_29_to_uint16, validate_indices = gather_29_validate_indices_0, x = var_619_shape_cast_fp16_to_uint16)[name = string("gather_29_cast_uint16")];
+            string gather_29_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_29_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_127_axes_0 = const()[name = string("expand_dims_127_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_29_cast_uint16_to_int32 = cast(dtype = gather_29_cast_uint16_to_int32_dtype_0, x = gather_29_cast_uint16)[name = string("cast_140")];
+            tensor<int32, [1]> expand_dims_127 = expand_dims(axes = expand_dims_127_axes_0, x = gather_29_cast_uint16_to_int32)[name = string("expand_dims_127")];
+            tensor<int32, [4]> concat_92 = const()[name = string("concat_92"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [1]> concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
+            bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_127, concat_93_values3_0))[name = string("concat_93")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_93, end_mask = v_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_15_stride_0, update = linear_29_cast_fp16, x = coreml_update_state_95)[name = string("v_cache2_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_15_cast_fp16, input = v_cache2)[name = string("coreml_update_state_97_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_97 = read_state(input = v_cache2)[name = string("coreml_update_state_97")];
+            tensor<fp16, [1280, 1280]> var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135048192)))];
+            tensor<fp16, [1, ?, 1280]> linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_641_to_fp16, x = audio_data)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_645_to_fp16 = const()[name = string("op_645_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138325056)))];
+            tensor<fp16, [1280]> var_646_to_fp16 = const()[name = string("op_646_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141601920)))];
+            tensor<fp16, [1, ?, 1280]> linear_31_cast_fp16 = linear(bias = var_646_to_fp16, weight = var_645_to_fp16, x = audio_data)[name = string("linear_31_cast_fp16")];
+            tensor<int32, [3]> var_648_shape_cast_fp16 = shape(x = linear_30_cast_fp16)[name = string("op_648_shape_cast_fp16")];
+            int32 gather_30_axis_0 = const()[name = string("gather_30_axis_0"), val = int32(0)];
+            int32 gather_30_batch_dims_0 = const()[name = string("gather_30_batch_dims_0"), val = int32(0)];
+            bool gather_30_validate_indices_0 = const()[name = string("gather_30_validate_indices_0"), val = bool(false)];
+            string var_648_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_648_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_30_to_uint16 = const()[name = string("select_30_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_648_shape_cast_fp16_to_uint16 = cast(dtype = var_648_shape_cast_fp16_to_uint16_dtype_0, x = var_648_shape_cast_fp16)[name = string("cast_139")];
+            uint16 gather_30_cast_uint16 = gather(axis = gather_30_axis_0, batch_dims = gather_30_batch_dims_0, indices = select_30_to_uint16, validate_indices = gather_30_validate_indices_0, x = var_648_shape_cast_fp16_to_uint16)[name = string("gather_30_cast_uint16")];
+            string gather_30_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_30_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_30_cast_uint16_to_int32 = cast(dtype = gather_30_cast_uint16_to_int32_dtype_0, x = gather_30_cast_uint16)[name = string("cast_138")];
+            tensor<int32, [1]> expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = gather_30_cast_uint16_to_int32)[name = string("expand_dims_131")];
+            tensor<int32, [4]> concat_95 = const()[name = string("concat_95"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [1]> concat_96_values0_0 = const()[name = string("concat_96_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_96_values1_0 = const()[name = string("concat_96_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_96_values3_0 = const()[name = string("concat_96_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)];
+            bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (concat_96_values0_0, concat_96_values1_0, expand_dims_131, concat_96_values3_0))[name = string("concat_96")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_95, begin_mask = k_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_96, end_mask = k_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_16_stride_0, update = linear_30_cast_fp16, x = coreml_update_state_96)[name = string("k_cache2_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_16_cast_fp16, input = k_cache2)[name = string("coreml_update_state_98_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_98 = read_state(input = k_cache2)[name = string("coreml_update_state_98")];
+            tensor<int32, [3]> var_653_shape_cast_fp16 = shape(x = linear_31_cast_fp16)[name = string("op_653_shape_cast_fp16")];
+            int32 gather_31_axis_0 = const()[name = string("gather_31_axis_0"), val = int32(0)];
+            int32 gather_31_batch_dims_0 = const()[name = string("gather_31_batch_dims_0"), val = int32(0)];
+            bool gather_31_validate_indices_0 = const()[name = string("gather_31_validate_indices_0"), val = bool(false)];
+            string var_653_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_653_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_31_to_uint16 = const()[name = string("select_31_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_653_shape_cast_fp16_to_uint16 = cast(dtype = var_653_shape_cast_fp16_to_uint16_dtype_0, x = var_653_shape_cast_fp16)[name = string("cast_137")];
+            uint16 gather_31_cast_uint16 = gather(axis = gather_31_axis_0, batch_dims = gather_31_batch_dims_0, indices = select_31_to_uint16, validate_indices = gather_31_validate_indices_0, x = var_653_shape_cast_fp16_to_uint16)[name = string("gather_31_cast_uint16")];
+            string gather_31_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_31_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_135_axes_0 = const()[name = string("expand_dims_135_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_31_cast_uint16_to_int32 = cast(dtype = gather_31_cast_uint16_to_int32_dtype_0, x = gather_31_cast_uint16)[name = string("cast_136")];
+            tensor<int32, [1]> expand_dims_135 = expand_dims(axes = expand_dims_135_axes_0, x = gather_31_cast_uint16_to_int32)[name = string("expand_dims_135")];
+            tensor<int32, [4]> concat_98 = const()[name = string("concat_98"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [1]> concat_99_values0_0 = const()[name = string("concat_99_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)];
+            bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (concat_99_values0_0, concat_99_values1_0, expand_dims_135, concat_99_values3_0))[name = string("concat_99")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_98, begin_mask = v_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_99, end_mask = v_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_16_stride_0, update = linear_31_cast_fp16, x = coreml_update_state_97)[name = string("v_cache2_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_16_cast_fp16, input = v_cache2)[name = string("coreml_update_state_99_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_99 = read_state(input = v_cache2)[name = string("coreml_update_state_99")];
+            tensor<fp16, [1280, 1280]> var_675_to_fp16 = const()[name = string("op_675_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141604544)))];
+            tensor<fp16, [1, ?, 1280]> linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_675_to_fp16, x = audio_data)[name = string("linear_32_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_679_to_fp16 = const()[name = string("op_679_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144881408)))];
+            tensor<fp16, [1280]> var_680_to_fp16 = const()[name = string("op_680_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148158272)))];
+            tensor<fp16, [1, ?, 1280]> linear_33_cast_fp16 = linear(bias = var_680_to_fp16, weight = var_679_to_fp16, x = audio_data)[name = string("linear_33_cast_fp16")];
+            tensor<int32, [3]> var_682_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_682_shape_cast_fp16")];
+            int32 gather_32_axis_0 = const()[name = string("gather_32_axis_0"), val = int32(0)];
+            int32 gather_32_batch_dims_0 = const()[name = string("gather_32_batch_dims_0"), val = int32(0)];
+            bool gather_32_validate_indices_0 = const()[name = string("gather_32_validate_indices_0"), val = bool(false)];
+            string var_682_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_682_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_32_to_uint16 = const()[name = string("select_32_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_682_shape_cast_fp16_to_uint16 = cast(dtype = var_682_shape_cast_fp16_to_uint16_dtype_0, x = var_682_shape_cast_fp16)[name = string("cast_135")];
+            uint16 gather_32_cast_uint16 = gather(axis = gather_32_axis_0, batch_dims = gather_32_batch_dims_0, indices = select_32_to_uint16, validate_indices = gather_32_validate_indices_0, x = var_682_shape_cast_fp16_to_uint16)[name = string("gather_32_cast_uint16")];
+            string gather_32_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_32_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_139_axes_0 = const()[name = string("expand_dims_139_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_32_cast_uint16_to_int32 = cast(dtype = gather_32_cast_uint16_to_int32_dtype_0, x = gather_32_cast_uint16)[name = string("cast_134")];
+            tensor<int32, [1]> expand_dims_139 = expand_dims(axes = expand_dims_139_axes_0, x = gather_32_cast_uint16_to_int32)[name = string("expand_dims_139")];
+            tensor<int32, [4]> concat_101 = const()[name = string("concat_101"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [1]> concat_102_values0_0 = const()[name = string("concat_102_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_102_values1_0 = const()[name = string("concat_102_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_102_values3_0 = const()[name = string("concat_102_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)];
+            bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (concat_102_values0_0, concat_102_values1_0, expand_dims_139, concat_102_values3_0))[name = string("concat_102")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_101, begin_mask = k_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_102, end_mask = k_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_17_stride_0, update = linear_32_cast_fp16, x = coreml_update_state_98)[name = string("k_cache2_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_17_cast_fp16, input = k_cache2)[name = string("coreml_update_state_100_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_100 = read_state(input = k_cache2)[name = string("coreml_update_state_100")];
+            tensor<int32, [3]> var_687_shape_cast_fp16 = shape(x = linear_33_cast_fp16)[name = string("op_687_shape_cast_fp16")];
+            int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)];
+            int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)];
+            bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)];
+            string var_687_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_687_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_687_shape_cast_fp16_to_uint16 = cast(dtype = var_687_shape_cast_fp16_to_uint16_dtype_0, x = var_687_shape_cast_fp16)[name = string("cast_133")];
+            uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_687_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")];
+            string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_143_axes_0 = const()[name = string("expand_dims_143_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_132")];
+            tensor<int32, [1]> expand_dims_143 = expand_dims(axes = expand_dims_143_axes_0, x = gather_33_cast_uint16_to_int32)[name = string("expand_dims_143")];
+            tensor<int32, [4]> concat_104 = const()[name = string("concat_104"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [1]> concat_105_values0_0 = const()[name = string("concat_105_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_105_values1_0 = const()[name = string("concat_105_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_105_values3_0 = const()[name = string("concat_105_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_105_axis_0 = const()[name = string("concat_105_axis_0"), val = int32(0)];
+            bool concat_105_interleave_0 = const()[name = string("concat_105_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_105 = concat(axis = concat_105_axis_0, interleave = concat_105_interleave_0, values = (concat_105_values0_0, concat_105_values1_0, expand_dims_143, concat_105_values3_0))[name = string("concat_105")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_104, begin_mask = v_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_105, end_mask = v_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_17_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_99)[name = string("v_cache2_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_17_cast_fp16, input = v_cache2)[name = string("coreml_update_state_101_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_101 = read_state(input = v_cache2)[name = string("coreml_update_state_101")];
+            tensor<fp16, [1280, 1280]> var_709_to_fp16 = const()[name = string("op_709_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148160896)))];
+            tensor<fp16, [1, ?, 1280]> linear_34_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_709_to_fp16, x = audio_data)[name = string("linear_34_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_713_to_fp16 = const()[name = string("op_713_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151437760)))];
+            tensor<fp16, [1280]> var_714_to_fp16 = const()[name = string("op_714_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154714624)))];
+            tensor<fp16, [1, ?, 1280]> linear_35_cast_fp16 = linear(bias = var_714_to_fp16, weight = var_713_to_fp16, x = audio_data)[name = string("linear_35_cast_fp16")];
+            tensor<int32, [3]> var_716_shape_cast_fp16 = shape(x = linear_34_cast_fp16)[name = string("op_716_shape_cast_fp16")];
+            int32 gather_34_axis_0 = const()[name = string("gather_34_axis_0"), val = int32(0)];
+            int32 gather_34_batch_dims_0 = const()[name = string("gather_34_batch_dims_0"), val = int32(0)];
+            bool gather_34_validate_indices_0 = const()[name = string("gather_34_validate_indices_0"), val = bool(false)];
+            string var_716_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_716_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_34_to_uint16 = const()[name = string("select_34_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_716_shape_cast_fp16_to_uint16 = cast(dtype = var_716_shape_cast_fp16_to_uint16_dtype_0, x = var_716_shape_cast_fp16)[name = string("cast_131")];
+            uint16 gather_34_cast_uint16 = gather(axis = gather_34_axis_0, batch_dims = gather_34_batch_dims_0, indices = select_34_to_uint16, validate_indices = gather_34_validate_indices_0, x = var_716_shape_cast_fp16_to_uint16)[name = string("gather_34_cast_uint16")];
+            string gather_34_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_34_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_34_cast_uint16_to_int32 = cast(dtype = gather_34_cast_uint16_to_int32_dtype_0, x = gather_34_cast_uint16)[name = string("cast_130")];
+            tensor<int32, [1]> expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = gather_34_cast_uint16_to_int32)[name = string("expand_dims_147")];
+            tensor<int32, [4]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [1]> concat_108_values0_0 = const()[name = string("concat_108_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)];
+            bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (concat_108_values0_0, concat_108_values1_0, expand_dims_147, concat_108_values3_0))[name = string("concat_108")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_107, begin_mask = k_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_108, end_mask = k_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_18_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_100)[name = string("k_cache2_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_18_cast_fp16, input = k_cache2)[name = string("coreml_update_state_102_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_102 = read_state(input = k_cache2)[name = string("coreml_update_state_102")];
+            tensor<int32, [3]> var_721_shape_cast_fp16 = shape(x = linear_35_cast_fp16)[name = string("op_721_shape_cast_fp16")];
+            int32 gather_35_axis_0 = const()[name = string("gather_35_axis_0"), val = int32(0)];
+            int32 gather_35_batch_dims_0 = const()[name = string("gather_35_batch_dims_0"), val = int32(0)];
+            bool gather_35_validate_indices_0 = const()[name = string("gather_35_validate_indices_0"), val = bool(false)];
+            string var_721_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_721_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_35_to_uint16 = const()[name = string("select_35_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_721_shape_cast_fp16_to_uint16 = cast(dtype = var_721_shape_cast_fp16_to_uint16_dtype_0, x = var_721_shape_cast_fp16)[name = string("cast_129")];
+            uint16 gather_35_cast_uint16 = gather(axis = gather_35_axis_0, batch_dims = gather_35_batch_dims_0, indices = select_35_to_uint16, validate_indices = gather_35_validate_indices_0, x = var_721_shape_cast_fp16_to_uint16)[name = string("gather_35_cast_uint16")];
+            string gather_35_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_35_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_151_axes_0 = const()[name = string("expand_dims_151_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_35_cast_uint16_to_int32 = cast(dtype = gather_35_cast_uint16_to_int32_dtype_0, x = gather_35_cast_uint16)[name = string("cast_128")];
+            tensor<int32, [1]> expand_dims_151 = expand_dims(axes = expand_dims_151_axes_0, x = gather_35_cast_uint16_to_int32)[name = string("expand_dims_151")];
+            tensor<int32, [4]> concat_110 = const()[name = string("concat_110"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [1]> concat_111_values0_0 = const()[name = string("concat_111_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)];
+            bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (concat_111_values0_0, concat_111_values1_0, expand_dims_151, concat_111_values3_0))[name = string("concat_111")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_110, begin_mask = v_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_111, end_mask = v_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_18_stride_0, update = linear_35_cast_fp16, x = coreml_update_state_101)[name = string("v_cache2_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_18_cast_fp16, input = v_cache2)[name = string("coreml_update_state_103_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_103 = read_state(input = v_cache2)[name = string("coreml_update_state_103")];
+            tensor<fp16, [1280, 1280]> var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154717248)))];
+            tensor<fp16, [1, ?, 1280]> linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_743_to_fp16, x = audio_data)[name = string("linear_36_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_747_to_fp16 = const()[name = string("op_747_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157994112)))];
+            tensor<fp16, [1280]> var_748_to_fp16 = const()[name = string("op_748_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161270976)))];
+            tensor<fp16, [1, ?, 1280]> linear_37_cast_fp16 = linear(bias = var_748_to_fp16, weight = var_747_to_fp16, x = audio_data)[name = string("linear_37_cast_fp16")];
+            tensor<int32, [3]> var_750_shape_cast_fp16 = shape(x = linear_36_cast_fp16)[name = string("op_750_shape_cast_fp16")];
+            int32 gather_36_axis_0 = const()[name = string("gather_36_axis_0"), val = int32(0)];
+            int32 gather_36_batch_dims_0 = const()[name = string("gather_36_batch_dims_0"), val = int32(0)];
+            bool gather_36_validate_indices_0 = const()[name = string("gather_36_validate_indices_0"), val = bool(false)];
+            string var_750_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_750_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_36_to_uint16 = const()[name = string("select_36_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_750_shape_cast_fp16_to_uint16 = cast(dtype = var_750_shape_cast_fp16_to_uint16_dtype_0, x = var_750_shape_cast_fp16)[name = string("cast_127")];
+            uint16 gather_36_cast_uint16 = gather(axis = gather_36_axis_0, batch_dims = gather_36_batch_dims_0, indices = select_36_to_uint16, validate_indices = gather_36_validate_indices_0, x = var_750_shape_cast_fp16_to_uint16)[name = string("gather_36_cast_uint16")];
+            string gather_36_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_36_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_155_axes_0 = const()[name = string("expand_dims_155_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_36_cast_uint16_to_int32 = cast(dtype = gather_36_cast_uint16_to_int32_dtype_0, x = gather_36_cast_uint16)[name = string("cast_126")];
+            tensor<int32, [1]> expand_dims_155 = expand_dims(axes = expand_dims_155_axes_0, x = gather_36_cast_uint16_to_int32)[name = string("expand_dims_155")];
+            tensor<int32, [4]> concat_113 = const()[name = string("concat_113"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [1]> concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_114_values1_0 = const()[name = string("concat_114_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_114_values3_0 = const()[name = string("concat_114_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
+            bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, concat_114_values1_0, expand_dims_155, concat_114_values3_0))[name = string("concat_114")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_113, begin_mask = k_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_114, end_mask = k_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_19_stride_0, update = linear_36_cast_fp16, x = coreml_update_state_102)[name = string("k_cache2_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_19_cast_fp16, input = k_cache2)[name = string("coreml_update_state_104_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_104 = read_state(input = k_cache2)[name = string("coreml_update_state_104")];
+            tensor<int32, [3]> var_755_shape_cast_fp16 = shape(x = linear_37_cast_fp16)[name = string("op_755_shape_cast_fp16")];
+            int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)];
+            int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)];
+            bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)];
+            string var_755_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_755_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_755_shape_cast_fp16_to_uint16 = cast(dtype = var_755_shape_cast_fp16_to_uint16_dtype_0, x = var_755_shape_cast_fp16)[name = string("cast_125")];
+            uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_755_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")];
+            string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_159_axes_0 = const()[name = string("expand_dims_159_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_124")];
+            tensor<int32, [1]> expand_dims_159 = expand_dims(axes = expand_dims_159_axes_0, x = gather_37_cast_uint16_to_int32)[name = string("expand_dims_159")];
+            tensor<int32, [4]> concat_116 = const()[name = string("concat_116"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [1]> concat_117_values0_0 = const()[name = string("concat_117_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_117_values1_0 = const()[name = string("concat_117_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_117_values3_0 = const()[name = string("concat_117_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_117_axis_0 = const()[name = string("concat_117_axis_0"), val = int32(0)];
+            bool concat_117_interleave_0 = const()[name = string("concat_117_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_117 = concat(axis = concat_117_axis_0, interleave = concat_117_interleave_0, values = (concat_117_values0_0, concat_117_values1_0, expand_dims_159, concat_117_values3_0))[name = string("concat_117")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_116, begin_mask = v_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_117, end_mask = v_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_19_stride_0, update = linear_37_cast_fp16, x = coreml_update_state_103)[name = string("v_cache2_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_19_cast_fp16, input = v_cache2)[name = string("coreml_update_state_105_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_105 = read_state(input = v_cache2)[name = string("coreml_update_state_105")];
+            tensor<fp16, [1280, 1280]> var_777_to_fp16 = const()[name = string("op_777_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161273600)))];
+            tensor<fp16, [1, ?, 1280]> linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_777_to_fp16, x = audio_data)[name = string("linear_38_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_781_to_fp16 = const()[name = string("op_781_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164550464)))];
+            tensor<fp16, [1280]> var_782_to_fp16 = const()[name = string("op_782_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167827328)))];
+            tensor<fp16, [1, ?, 1280]> linear_39_cast_fp16 = linear(bias = var_782_to_fp16, weight = var_781_to_fp16, x = audio_data)[name = string("linear_39_cast_fp16")];
+            tensor<int32, [3]> var_784_shape_cast_fp16 = shape(x = linear_38_cast_fp16)[name = string("op_784_shape_cast_fp16")];
+            int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
+            int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
+            bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
+            string var_784_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_784_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_784_shape_cast_fp16_to_uint16 = cast(dtype = var_784_shape_cast_fp16_to_uint16_dtype_0, x = var_784_shape_cast_fp16)[name = string("cast_123")];
+            uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_784_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
+            string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_122")];
+            tensor<int32, [1]> expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = gather_38_cast_uint16_to_int32)[name = string("expand_dims_163")];
+            tensor<int32, [4]> concat_119 = const()[name = string("concat_119"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [1]> concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_120_values1_0 = const()[name = string("concat_120_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_120_values3_0 = const()[name = string("concat_120_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)];
+            bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, concat_120_values1_0, expand_dims_163, concat_120_values3_0))[name = string("concat_120")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_119, begin_mask = k_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_120, end_mask = k_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_20_stride_0, update = linear_38_cast_fp16, x = coreml_update_state_104)[name = string("k_cache2_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_20_cast_fp16, input = k_cache2)[name = string("coreml_update_state_106_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_106 = read_state(input = k_cache2)[name = string("coreml_update_state_106")];
+            tensor<int32, [3]> var_789_shape_cast_fp16 = shape(x = linear_39_cast_fp16)[name = string("op_789_shape_cast_fp16")];
+            int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)];
+            int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)];
+            bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)];
+            string var_789_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_789_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_789_shape_cast_fp16_to_uint16 = cast(dtype = var_789_shape_cast_fp16_to_uint16_dtype_0, x = var_789_shape_cast_fp16)[name = string("cast_121")];
+            uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_789_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")];
+            string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_167_axes_0 = const()[name = string("expand_dims_167_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_39_cast_uint16_to_int32 = cast(dtype = gather_39_cast_uint16_to_int32_dtype_0, x = gather_39_cast_uint16)[name = string("cast_120")];
+            tensor<int32, [1]> expand_dims_167 = expand_dims(axes = expand_dims_167_axes_0, x = gather_39_cast_uint16_to_int32)[name = string("expand_dims_167")];
+            tensor<int32, [4]> concat_122 = const()[name = string("concat_122"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [1]> concat_123_values0_0 = const()[name = string("concat_123_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)];
+            bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (concat_123_values0_0, concat_123_values1_0, expand_dims_167, concat_123_values3_0))[name = string("concat_123")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_122, begin_mask = v_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_123, end_mask = v_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_20_stride_0, update = linear_39_cast_fp16, x = coreml_update_state_105)[name = string("v_cache2_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_20_cast_fp16, input = v_cache2)[name = string("coreml_update_state_107_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_107 = read_state(input = v_cache2)[name = string("coreml_update_state_107")];
+            tensor<fp16, [1280, 1280]> var_811_to_fp16 = const()[name = string("op_811_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167829952)))];
+            tensor<fp16, [1, ?, 1280]> linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_811_to_fp16, x = audio_data)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_815_to_fp16 = const()[name = string("op_815_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171106816)))];
+            tensor<fp16, [1280]> var_816_to_fp16 = const()[name = string("op_816_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174383680)))];
+            tensor<fp16, [1, ?, 1280]> linear_41_cast_fp16 = linear(bias = var_816_to_fp16, weight = var_815_to_fp16, x = audio_data)[name = string("linear_41_cast_fp16")];
+            tensor<int32, [3]> var_818_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_818_shape_cast_fp16")];
+            int32 gather_40_axis_0 = const()[name = string("gather_40_axis_0"), val = int32(0)];
+            int32 gather_40_batch_dims_0 = const()[name = string("gather_40_batch_dims_0"), val = int32(0)];
+            bool gather_40_validate_indices_0 = const()[name = string("gather_40_validate_indices_0"), val = bool(false)];
+            string var_818_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_818_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_40_to_uint16 = const()[name = string("select_40_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_818_shape_cast_fp16_to_uint16 = cast(dtype = var_818_shape_cast_fp16_to_uint16_dtype_0, x = var_818_shape_cast_fp16)[name = string("cast_119")];
+            uint16 gather_40_cast_uint16 = gather(axis = gather_40_axis_0, batch_dims = gather_40_batch_dims_0, indices = select_40_to_uint16, validate_indices = gather_40_validate_indices_0, x = var_818_shape_cast_fp16_to_uint16)[name = string("gather_40_cast_uint16")];
+            string gather_40_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_40_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_171_axes_0 = const()[name = string("expand_dims_171_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_40_cast_uint16_to_int32 = cast(dtype = gather_40_cast_uint16_to_int32_dtype_0, x = gather_40_cast_uint16)[name = string("cast_118")];
+            tensor<int32, [1]> expand_dims_171 = expand_dims(axes = expand_dims_171_axes_0, x = gather_40_cast_uint16_to_int32)[name = string("expand_dims_171")];
+            tensor<int32, [4]> concat_125 = const()[name = string("concat_125"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [1]> concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_126_values1_0 = const()[name = string("concat_126_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_126_values3_0 = const()[name = string("concat_126_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)];
+            bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, concat_126_values1_0, expand_dims_171, concat_126_values3_0))[name = string("concat_126")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_125, begin_mask = k_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_126, end_mask = k_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_21_stride_0, update = linear_40_cast_fp16, x = coreml_update_state_106)[name = string("k_cache2_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_21_cast_fp16, input = k_cache2)[name = string("coreml_update_state_108_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_108 = read_state(input = k_cache2)[name = string("coreml_update_state_108")];
+            tensor<int32, [3]> var_823_shape_cast_fp16 = shape(x = linear_41_cast_fp16)[name = string("op_823_shape_cast_fp16")];
+            int32 gather_41_axis_0 = const()[name = string("gather_41_axis_0"), val = int32(0)];
+            int32 gather_41_batch_dims_0 = const()[name = string("gather_41_batch_dims_0"), val = int32(0)];
+            bool gather_41_validate_indices_0 = const()[name = string("gather_41_validate_indices_0"), val = bool(false)];
+            string var_823_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_823_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_41_to_uint16 = const()[name = string("select_41_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_823_shape_cast_fp16_to_uint16 = cast(dtype = var_823_shape_cast_fp16_to_uint16_dtype_0, x = var_823_shape_cast_fp16)[name = string("cast_117")];
+            uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_823_shape_cast_fp16_to_uint16)[name = string("gather_41_cast_uint16")];
+            string gather_41_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_41_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_175_axes_0 = const()[name = string("expand_dims_175_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_41_cast_uint16_to_int32 = cast(dtype = gather_41_cast_uint16_to_int32_dtype_0, x = gather_41_cast_uint16)[name = string("cast_116")];
+            tensor<int32, [1]> expand_dims_175 = expand_dims(axes = expand_dims_175_axes_0, x = gather_41_cast_uint16_to_int32)[name = string("expand_dims_175")];
+            tensor<int32, [4]> concat_128 = const()[name = string("concat_128"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [1]> concat_129_values0_0 = const()[name = string("concat_129_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)];
+            bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (concat_129_values0_0, concat_129_values1_0, expand_dims_175, concat_129_values3_0))[name = string("concat_129")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_128, begin_mask = v_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_129, end_mask = v_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_21_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_107)[name = string("v_cache2_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_21_cast_fp16, input = v_cache2)[name = string("coreml_update_state_109_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_109 = read_state(input = v_cache2)[name = string("coreml_update_state_109")];
+            tensor<fp16, [1280, 1280]> var_845_to_fp16 = const()[name = string("op_845_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174386304)))];
+            tensor<fp16, [1, ?, 1280]> linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_845_to_fp16, x = audio_data)[name = string("linear_42_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177663168)))];
+            tensor<fp16, [1280]> var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180940032)))];
+            tensor<fp16, [1, ?, 1280]> linear_43_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = audio_data)[name = string("linear_43_cast_fp16")];
+            tensor<int32, [3]> var_852_shape_cast_fp16 = shape(x = linear_42_cast_fp16)[name = string("op_852_shape_cast_fp16")];
+            int32 gather_42_axis_0 = const()[name = string("gather_42_axis_0"), val = int32(0)];
+            int32 gather_42_batch_dims_0 = const()[name = string("gather_42_batch_dims_0"), val = int32(0)];
+            bool gather_42_validate_indices_0 = const()[name = string("gather_42_validate_indices_0"), val = bool(false)];
+            string var_852_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_852_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_42_to_uint16 = const()[name = string("select_42_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_852_shape_cast_fp16_to_uint16 = cast(dtype = var_852_shape_cast_fp16_to_uint16_dtype_0, x = var_852_shape_cast_fp16)[name = string("cast_115")];
+            uint16 gather_42_cast_uint16 = gather(axis = gather_42_axis_0, batch_dims = gather_42_batch_dims_0, indices = select_42_to_uint16, validate_indices = gather_42_validate_indices_0, x = var_852_shape_cast_fp16_to_uint16)[name = string("gather_42_cast_uint16")];
+            string gather_42_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_42_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_42_cast_uint16_to_int32 = cast(dtype = gather_42_cast_uint16_to_int32_dtype_0, x = gather_42_cast_uint16)[name = string("cast_114")];
+            tensor<int32, [1]> expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = gather_42_cast_uint16_to_int32)[name = string("expand_dims_179")];
+            tensor<int32, [4]> concat_131 = const()[name = string("concat_131"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [1]> concat_132_values0_0 = const()[name = string("concat_132_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)];
+            bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (concat_132_values0_0, concat_132_values1_0, expand_dims_179, concat_132_values3_0))[name = string("concat_132")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_131, begin_mask = k_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_132, end_mask = k_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_22_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_108)[name = string("k_cache2_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_22_cast_fp16, input = k_cache2)[name = string("coreml_update_state_110_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_110 = read_state(input = k_cache2)[name = string("coreml_update_state_110")];
+            tensor<int32, [3]> var_857_shape_cast_fp16 = shape(x = linear_43_cast_fp16)[name = string("op_857_shape_cast_fp16")];
+            int32 gather_43_axis_0 = const()[name = string("gather_43_axis_0"), val = int32(0)];
+            int32 gather_43_batch_dims_0 = const()[name = string("gather_43_batch_dims_0"), val = int32(0)];
+            bool gather_43_validate_indices_0 = const()[name = string("gather_43_validate_indices_0"), val = bool(false)];
+            string var_857_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_857_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_43_to_uint16 = const()[name = string("select_43_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_857_shape_cast_fp16_to_uint16 = cast(dtype = var_857_shape_cast_fp16_to_uint16_dtype_0, x = var_857_shape_cast_fp16)[name = string("cast_113")];
+            uint16 gather_43_cast_uint16 = gather(axis = gather_43_axis_0, batch_dims = gather_43_batch_dims_0, indices = select_43_to_uint16, validate_indices = gather_43_validate_indices_0, x = var_857_shape_cast_fp16_to_uint16)[name = string("gather_43_cast_uint16")];
+            string gather_43_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_43_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_183_axes_0 = const()[name = string("expand_dims_183_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_43_cast_uint16_to_int32 = cast(dtype = gather_43_cast_uint16_to_int32_dtype_0, x = gather_43_cast_uint16)[name = string("cast_112")];
+            tensor<int32, [1]> expand_dims_183 = expand_dims(axes = expand_dims_183_axes_0, x = gather_43_cast_uint16_to_int32)[name = string("expand_dims_183")];
+            tensor<int32, [4]> concat_134 = const()[name = string("concat_134"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [1]> concat_135_values0_0 = const()[name = string("concat_135_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)];
+            bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (concat_135_values0_0, concat_135_values1_0, expand_dims_183, concat_135_values3_0))[name = string("concat_135")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_134, begin_mask = v_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_135, end_mask = v_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_22_stride_0, update = linear_43_cast_fp16, x = coreml_update_state_109)[name = string("v_cache2_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_22_cast_fp16, input = v_cache2)[name = string("coreml_update_state_111_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_111 = read_state(input = v_cache2)[name = string("coreml_update_state_111")];
+            tensor<fp16, [1280, 1280]> var_879_to_fp16 = const()[name = string("op_879_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180942656)))];
+            tensor<fp16, [1, ?, 1280]> linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_879_to_fp16, x = audio_data)[name = string("linear_44_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_883_to_fp16 = const()[name = string("op_883_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184219520)))];
+            tensor<fp16, [1280]> var_884_to_fp16 = const()[name = string("op_884_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187496384)))];
+            tensor<fp16, [1, ?, 1280]> linear_45_cast_fp16 = linear(bias = var_884_to_fp16, weight = var_883_to_fp16, x = audio_data)[name = string("linear_45_cast_fp16")];
+            tensor<int32, [3]> var_886_shape_cast_fp16 = shape(x = linear_44_cast_fp16)[name = string("op_886_shape_cast_fp16")];
+            int32 gather_44_axis_0 = const()[name = string("gather_44_axis_0"), val = int32(0)];
+            int32 gather_44_batch_dims_0 = const()[name = string("gather_44_batch_dims_0"), val = int32(0)];
+            bool gather_44_validate_indices_0 = const()[name = string("gather_44_validate_indices_0"), val = bool(false)];
+            string var_886_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_886_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_44_to_uint16 = const()[name = string("select_44_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_886_shape_cast_fp16_to_uint16 = cast(dtype = var_886_shape_cast_fp16_to_uint16_dtype_0, x = var_886_shape_cast_fp16)[name = string("cast_111")];
+            uint16 gather_44_cast_uint16 = gather(axis = gather_44_axis_0, batch_dims = gather_44_batch_dims_0, indices = select_44_to_uint16, validate_indices = gather_44_validate_indices_0, x = var_886_shape_cast_fp16_to_uint16)[name = string("gather_44_cast_uint16")];
+            string gather_44_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_44_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_187_axes_0 = const()[name = string("expand_dims_187_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_44_cast_uint16_to_int32 = cast(dtype = gather_44_cast_uint16_to_int32_dtype_0, x = gather_44_cast_uint16)[name = string("cast_110")];
+            tensor<int32, [1]> expand_dims_187 = expand_dims(axes = expand_dims_187_axes_0, x = gather_44_cast_uint16_to_int32)[name = string("expand_dims_187")];
+            tensor<int32, [4]> concat_137 = const()[name = string("concat_137"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [1]> concat_138_values0_0 = const()[name = string("concat_138_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_138_values1_0 = const()[name = string("concat_138_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_138_values3_0 = const()[name = string("concat_138_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)];
+            bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (concat_138_values0_0, concat_138_values1_0, expand_dims_187, concat_138_values3_0))[name = string("concat_138")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_137, begin_mask = k_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_138, end_mask = k_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_23_stride_0, update = linear_44_cast_fp16, x = coreml_update_state_110)[name = string("k_cache2_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_23_cast_fp16, input = k_cache2)[name = string("coreml_update_state_112_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_112 = read_state(input = k_cache2)[name = string("coreml_update_state_112")];
+            tensor<int32, [3]> var_891_shape_cast_fp16 = shape(x = linear_45_cast_fp16)[name = string("op_891_shape_cast_fp16")];
+            int32 gather_45_axis_0 = const()[name = string("gather_45_axis_0"), val = int32(0)];
+            int32 gather_45_batch_dims_0 = const()[name = string("gather_45_batch_dims_0"), val = int32(0)];
+            bool gather_45_validate_indices_0 = const()[name = string("gather_45_validate_indices_0"), val = bool(false)];
+            string var_891_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_891_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_45_to_uint16 = const()[name = string("select_45_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_891_shape_cast_fp16_to_uint16 = cast(dtype = var_891_shape_cast_fp16_to_uint16_dtype_0, x = var_891_shape_cast_fp16)[name = string("cast_109")];
+            uint16 gather_45_cast_uint16 = gather(axis = gather_45_axis_0, batch_dims = gather_45_batch_dims_0, indices = select_45_to_uint16, validate_indices = gather_45_validate_indices_0, x = var_891_shape_cast_fp16_to_uint16)[name = string("gather_45_cast_uint16")];
+            string gather_45_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_45_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_191_axes_0 = const()[name = string("expand_dims_191_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_45_cast_uint16_to_int32 = cast(dtype = gather_45_cast_uint16_to_int32_dtype_0, x = gather_45_cast_uint16)[name = string("cast_108")];
+            tensor<int32, [1]> expand_dims_191 = expand_dims(axes = expand_dims_191_axes_0, x = gather_45_cast_uint16_to_int32)[name = string("expand_dims_191")];
+            tensor<int32, [4]> concat_140 = const()[name = string("concat_140"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [1]> concat_141_values0_0 = const()[name = string("concat_141_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_141_values1_0 = const()[name = string("concat_141_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_141_values3_0 = const()[name = string("concat_141_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_141_axis_0 = const()[name = string("concat_141_axis_0"), val = int32(0)];
+            bool concat_141_interleave_0 = const()[name = string("concat_141_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_141 = concat(axis = concat_141_axis_0, interleave = concat_141_interleave_0, values = (concat_141_values0_0, concat_141_values1_0, expand_dims_191, concat_141_values3_0))[name = string("concat_141")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_140, begin_mask = v_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_141, end_mask = v_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_23_stride_0, update = linear_45_cast_fp16, x = coreml_update_state_111)[name = string("v_cache2_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_23_cast_fp16, input = v_cache2)[name = string("coreml_update_state_113_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_113 = read_state(input = v_cache2)[name = string("coreml_update_state_113")];
+            tensor<fp16, [1280, 1280]> var_913_to_fp16 = const()[name = string("op_913_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187499008)))];
+            tensor<fp16, [1, ?, 1280]> linear_46_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_913_to_fp16, x = audio_data)[name = string("linear_46_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_917_to_fp16 = const()[name = string("op_917_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190775872)))];
+            tensor<fp16, [1280]> var_918_to_fp16 = const()[name = string("op_918_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194052736)))];
+            tensor<fp16, [1, ?, 1280]> linear_47_cast_fp16 = linear(bias = var_918_to_fp16, weight = var_917_to_fp16, x = audio_data)[name = string("linear_47_cast_fp16")];
+            tensor<int32, [3]> var_920_shape_cast_fp16 = shape(x = linear_46_cast_fp16)[name = string("op_920_shape_cast_fp16")];
+            int32 gather_46_axis_0 = const()[name = string("gather_46_axis_0"), val = int32(0)];
+            int32 gather_46_batch_dims_0 = const()[name = string("gather_46_batch_dims_0"), val = int32(0)];
+            bool gather_46_validate_indices_0 = const()[name = string("gather_46_validate_indices_0"), val = bool(false)];
+            string var_920_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_920_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_46_to_uint16 = const()[name = string("select_46_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_920_shape_cast_fp16_to_uint16 = cast(dtype = var_920_shape_cast_fp16_to_uint16_dtype_0, x = var_920_shape_cast_fp16)[name = string("cast_107")];
+            uint16 gather_46_cast_uint16 = gather(axis = gather_46_axis_0, batch_dims = gather_46_batch_dims_0, indices = select_46_to_uint16, validate_indices = gather_46_validate_indices_0, x = var_920_shape_cast_fp16_to_uint16)[name = string("gather_46_cast_uint16")];
+            string gather_46_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_46_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_46_cast_uint16_to_int32 = cast(dtype = gather_46_cast_uint16_to_int32_dtype_0, x = gather_46_cast_uint16)[name = string("cast_106")];
+            tensor<int32, [1]> expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = gather_46_cast_uint16_to_int32)[name = string("expand_dims_195")];
+            tensor<int32, [4]> concat_143 = const()[name = string("concat_143"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [1]> concat_144_values0_0 = const()[name = string("concat_144_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_144_values1_0 = const()[name = string("concat_144_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_144_values3_0 = const()[name = string("concat_144_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_144_axis_0 = const()[name = string("concat_144_axis_0"), val = int32(0)];
+            bool concat_144_interleave_0 = const()[name = string("concat_144_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_144 = concat(axis = concat_144_axis_0, interleave = concat_144_interleave_0, values = (concat_144_values0_0, concat_144_values1_0, expand_dims_195, concat_144_values3_0))[name = string("concat_144")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_143, begin_mask = k_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_144, end_mask = k_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_24_stride_0, update = linear_46_cast_fp16, x = coreml_update_state_112)[name = string("k_cache2_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_24_cast_fp16, input = k_cache2)[name = string("coreml_update_state_114_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_114 = read_state(input = k_cache2)[name = string("coreml_update_state_114")];
+            tensor<int32, [3]> var_925_shape_cast_fp16 = shape(x = linear_47_cast_fp16)[name = string("op_925_shape_cast_fp16")];
+            int32 gather_47_axis_0 = const()[name = string("gather_47_axis_0"), val = int32(0)];
+            int32 gather_47_batch_dims_0 = const()[name = string("gather_47_batch_dims_0"), val = int32(0)];
+            bool gather_47_validate_indices_0 = const()[name = string("gather_47_validate_indices_0"), val = bool(false)];
+            string var_925_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_925_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_47_to_uint16 = const()[name = string("select_47_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_925_shape_cast_fp16_to_uint16 = cast(dtype = var_925_shape_cast_fp16_to_uint16_dtype_0, x = var_925_shape_cast_fp16)[name = string("cast_105")];
+            uint16 gather_47_cast_uint16 = gather(axis = gather_47_axis_0, batch_dims = gather_47_batch_dims_0, indices = select_47_to_uint16, validate_indices = gather_47_validate_indices_0, x = var_925_shape_cast_fp16_to_uint16)[name = string("gather_47_cast_uint16")];
+            string gather_47_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_47_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_199_axes_0 = const()[name = string("expand_dims_199_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_47_cast_uint16_to_int32 = cast(dtype = gather_47_cast_uint16_to_int32_dtype_0, x = gather_47_cast_uint16)[name = string("cast_104")];
+            tensor<int32, [1]> expand_dims_199 = expand_dims(axes = expand_dims_199_axes_0, x = gather_47_cast_uint16_to_int32)[name = string("expand_dims_199")];
+            tensor<int32, [4]> concat_146 = const()[name = string("concat_146"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [1]> concat_147_values0_0 = const()[name = string("concat_147_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)];
+            bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (concat_147_values0_0, concat_147_values1_0, expand_dims_199, concat_147_values3_0))[name = string("concat_147")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_146, begin_mask = v_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_147, end_mask = v_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_24_stride_0, update = linear_47_cast_fp16, x = coreml_update_state_113)[name = string("v_cache2_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_24_cast_fp16, input = v_cache2)[name = string("coreml_update_state_115_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_115 = read_state(input = v_cache2)[name = string("coreml_update_state_115")];
+            tensor<fp16, [1280, 1280]> var_947_to_fp16 = const()[name = string("op_947_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194055360)))];
+            tensor<fp16, [1, ?, 1280]> linear_48_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_947_to_fp16, x = audio_data)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_951_to_fp16 = const()[name = string("op_951_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197332224)))];
+            tensor<fp16, [1280]> var_952_to_fp16 = const()[name = string("op_952_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200609088)))];
+            tensor<fp16, [1, ?, 1280]> linear_49_cast_fp16 = linear(bias = var_952_to_fp16, weight = var_951_to_fp16, x = audio_data)[name = string("linear_49_cast_fp16")];
+            tensor<int32, [3]> var_954_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_954_shape_cast_fp16")];
+            int32 gather_48_axis_0 = const()[name = string("gather_48_axis_0"), val = int32(0)];
+            int32 gather_48_batch_dims_0 = const()[name = string("gather_48_batch_dims_0"), val = int32(0)];
+            bool gather_48_validate_indices_0 = const()[name = string("gather_48_validate_indices_0"), val = bool(false)];
+            string var_954_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_954_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_48_to_uint16 = const()[name = string("select_48_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_954_shape_cast_fp16_to_uint16 = cast(dtype = var_954_shape_cast_fp16_to_uint16_dtype_0, x = var_954_shape_cast_fp16)[name = string("cast_103")];
+            uint16 gather_48_cast_uint16 = gather(axis = gather_48_axis_0, batch_dims = gather_48_batch_dims_0, indices = select_48_to_uint16, validate_indices = gather_48_validate_indices_0, x = var_954_shape_cast_fp16_to_uint16)[name = string("gather_48_cast_uint16")];
+            string gather_48_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_48_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_203_axes_0 = const()[name = string("expand_dims_203_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_48_cast_uint16_to_int32 = cast(dtype = gather_48_cast_uint16_to_int32_dtype_0, x = gather_48_cast_uint16)[name = string("cast_102")];
+            tensor<int32, [1]> expand_dims_203 = expand_dims(axes = expand_dims_203_axes_0, x = gather_48_cast_uint16_to_int32)[name = string("expand_dims_203")];
+            tensor<int32, [4]> concat_149 = const()[name = string("concat_149"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [1]> concat_150_values0_0 = const()[name = string("concat_150_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_150_values1_0 = const()[name = string("concat_150_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_150_values3_0 = const()[name = string("concat_150_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)];
+            bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (concat_150_values0_0, concat_150_values1_0, expand_dims_203, concat_150_values3_0))[name = string("concat_150")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_25_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_25_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_25_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_25_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_149, begin_mask = k_cache2_internal_tensor_assign_25_begin_mask_0, end = concat_150, end_mask = k_cache2_internal_tensor_assign_25_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_25_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_25_stride_0, update = linear_48_cast_fp16, x = coreml_update_state_114)[name = string("k_cache2_internal_tensor_assign_25_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_25_cast_fp16, input = k_cache2)[name = string("coreml_update_state_116_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_116 = read_state(input = k_cache2)[name = string("coreml_update_state_116")];
+            tensor<int32, [3]> var_959_shape_cast_fp16 = shape(x = linear_49_cast_fp16)[name = string("op_959_shape_cast_fp16")];
+            int32 gather_49_axis_0 = const()[name = string("gather_49_axis_0"), val = int32(0)];
+            int32 gather_49_batch_dims_0 = const()[name = string("gather_49_batch_dims_0"), val = int32(0)];
+            bool gather_49_validate_indices_0 = const()[name = string("gather_49_validate_indices_0"), val = bool(false)];
+            string var_959_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_959_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_49_to_uint16 = const()[name = string("select_49_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_959_shape_cast_fp16_to_uint16 = cast(dtype = var_959_shape_cast_fp16_to_uint16_dtype_0, x = var_959_shape_cast_fp16)[name = string("cast_101")];
+            uint16 gather_49_cast_uint16 = gather(axis = gather_49_axis_0, batch_dims = gather_49_batch_dims_0, indices = select_49_to_uint16, validate_indices = gather_49_validate_indices_0, x = var_959_shape_cast_fp16_to_uint16)[name = string("gather_49_cast_uint16")];
+            string gather_49_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_49_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_207_axes_0 = const()[name = string("expand_dims_207_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_49_cast_uint16_to_int32 = cast(dtype = gather_49_cast_uint16_to_int32_dtype_0, x = gather_49_cast_uint16)[name = string("cast_100")];
+            tensor<int32, [1]> expand_dims_207 = expand_dims(axes = expand_dims_207_axes_0, x = gather_49_cast_uint16_to_int32)[name = string("expand_dims_207")];
+            tensor<int32, [4]> concat_152 = const()[name = string("concat_152"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [1]> concat_153_values0_0 = const()[name = string("concat_153_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_153_values1_0 = const()[name = string("concat_153_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_153_values3_0 = const()[name = string("concat_153_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_153_axis_0 = const()[name = string("concat_153_axis_0"), val = int32(0)];
+            bool concat_153_interleave_0 = const()[name = string("concat_153_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_153 = concat(axis = concat_153_axis_0, interleave = concat_153_interleave_0, values = (concat_153_values0_0, concat_153_values1_0, expand_dims_207, concat_153_values3_0))[name = string("concat_153")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_25_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_25_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_25_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_25_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_cache2_internal_tensor_assign_25_begin_mask_0, end = concat_153, end_mask = v_cache2_internal_tensor_assign_25_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_25_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_25_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_115)[name = string("v_cache2_internal_tensor_assign_25_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_25_cast_fp16, input = v_cache2)[name = string("coreml_update_state_117_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_117 = read_state(input = v_cache2)[name = string("coreml_update_state_117")];
+            tensor<fp16, [1280, 1280]> var_981_to_fp16 = const()[name = string("op_981_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200611712)))];
+            tensor<fp16, [1, ?, 1280]> linear_50_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_981_to_fp16, x = audio_data)[name = string("linear_50_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_985_to_fp16 = const()[name = string("op_985_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203888576)))];
+            tensor<fp16, [1280]> var_986_to_fp16 = const()[name = string("op_986_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207165440)))];
+            tensor<fp16, [1, ?, 1280]> linear_51_cast_fp16 = linear(bias = var_986_to_fp16, weight = var_985_to_fp16, x = audio_data)[name = string("linear_51_cast_fp16")];
+            tensor<int32, [3]> var_988_shape_cast_fp16 = shape(x = linear_50_cast_fp16)[name = string("op_988_shape_cast_fp16")];
+            int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)];
+            int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)];
+            bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)];
+            string var_988_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_988_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_988_shape_cast_fp16_to_uint16 = cast(dtype = var_988_shape_cast_fp16_to_uint16_dtype_0, x = var_988_shape_cast_fp16)[name = string("cast_99")];
+            uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_988_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")];
+            string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_98")];
+            tensor<int32, [1]> expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = gather_50_cast_uint16_to_int32)[name = string("expand_dims_211")];
+            tensor<int32, [4]> concat_155 = const()[name = string("concat_155"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [1]> concat_156_values0_0 = const()[name = string("concat_156_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_156_values1_0 = const()[name = string("concat_156_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_156_values3_0 = const()[name = string("concat_156_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_156_axis_0 = const()[name = string("concat_156_axis_0"), val = int32(0)];
+            bool concat_156_interleave_0 = const()[name = string("concat_156_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_156 = concat(axis = concat_156_axis_0, interleave = concat_156_interleave_0, values = (concat_156_values0_0, concat_156_values1_0, expand_dims_211, concat_156_values3_0))[name = string("concat_156")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_26_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_26_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_26_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_26_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_155, begin_mask = k_cache2_internal_tensor_assign_26_begin_mask_0, end = concat_156, end_mask = k_cache2_internal_tensor_assign_26_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_26_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_26_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_116)[name = string("k_cache2_internal_tensor_assign_26_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_26_cast_fp16, input = k_cache2)[name = string("coreml_update_state_118_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_118 = read_state(input = k_cache2)[name = string("coreml_update_state_118")];
+            tensor<int32, [3]> var_993_shape_cast_fp16 = shape(x = linear_51_cast_fp16)[name = string("op_993_shape_cast_fp16")];
+            int32 gather_51_axis_0 = const()[name = string("gather_51_axis_0"), val = int32(0)];
+            int32 gather_51_batch_dims_0 = const()[name = string("gather_51_batch_dims_0"), val = int32(0)];
+            bool gather_51_validate_indices_0 = const()[name = string("gather_51_validate_indices_0"), val = bool(false)];
+            string var_993_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_993_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_51_to_uint16 = const()[name = string("select_51_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_993_shape_cast_fp16_to_uint16 = cast(dtype = var_993_shape_cast_fp16_to_uint16_dtype_0, x = var_993_shape_cast_fp16)[name = string("cast_97")];
+            uint16 gather_51_cast_uint16 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51_to_uint16, validate_indices = gather_51_validate_indices_0, x = var_993_shape_cast_fp16_to_uint16)[name = string("gather_51_cast_uint16")];
+            string gather_51_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_51_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_215_axes_0 = const()[name = string("expand_dims_215_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_51_cast_uint16_to_int32 = cast(dtype = gather_51_cast_uint16_to_int32_dtype_0, x = gather_51_cast_uint16)[name = string("cast_96")];
+            tensor<int32, [1]> expand_dims_215 = expand_dims(axes = expand_dims_215_axes_0, x = gather_51_cast_uint16_to_int32)[name = string("expand_dims_215")];
+            tensor<int32, [4]> concat_158 = const()[name = string("concat_158"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [1]> concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)];
+            bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_215, concat_159_values3_0))[name = string("concat_159")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_26_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_26_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_26_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_26_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache2_internal_tensor_assign_26_begin_mask_0, end = concat_159, end_mask = v_cache2_internal_tensor_assign_26_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_26_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_26_stride_0, update = linear_51_cast_fp16, x = coreml_update_state_117)[name = string("v_cache2_internal_tensor_assign_26_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_26_cast_fp16, input = v_cache2)[name = string("coreml_update_state_119_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_119 = read_state(input = v_cache2)[name = string("coreml_update_state_119")];
+            tensor<fp16, [1280, 1280]> var_1015_to_fp16 = const()[name = string("op_1015_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207168064)))];
+            tensor<fp16, [1, ?, 1280]> linear_52_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1015_to_fp16, x = audio_data)[name = string("linear_52_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1019_to_fp16 = const()[name = string("op_1019_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210444928)))];
+            tensor<fp16, [1280]> var_1020_to_fp16 = const()[name = string("op_1020_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213721792)))];
+            tensor<fp16, [1, ?, 1280]> linear_53_cast_fp16 = linear(bias = var_1020_to_fp16, weight = var_1019_to_fp16, x = audio_data)[name = string("linear_53_cast_fp16")];
+            tensor<int32, [3]> var_1022_shape_cast_fp16 = shape(x = linear_52_cast_fp16)[name = string("op_1022_shape_cast_fp16")];
+            int32 gather_52_axis_0 = const()[name = string("gather_52_axis_0"), val = int32(0)];
+            int32 gather_52_batch_dims_0 = const()[name = string("gather_52_batch_dims_0"), val = int32(0)];
+            bool gather_52_validate_indices_0 = const()[name = string("gather_52_validate_indices_0"), val = bool(false)];
+            string var_1022_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1022_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_52_to_uint16 = const()[name = string("select_52_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1022_shape_cast_fp16_to_uint16 = cast(dtype = var_1022_shape_cast_fp16_to_uint16_dtype_0, x = var_1022_shape_cast_fp16)[name = string("cast_95")];
+            uint16 gather_52_cast_uint16 = gather(axis = gather_52_axis_0, batch_dims = gather_52_batch_dims_0, indices = select_52_to_uint16, validate_indices = gather_52_validate_indices_0, x = var_1022_shape_cast_fp16_to_uint16)[name = string("gather_52_cast_uint16")];
+            string gather_52_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_52_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_219_axes_0 = const()[name = string("expand_dims_219_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_52_cast_uint16_to_int32 = cast(dtype = gather_52_cast_uint16_to_int32_dtype_0, x = gather_52_cast_uint16)[name = string("cast_94")];
+            tensor<int32, [1]> expand_dims_219 = expand_dims(axes = expand_dims_219_axes_0, x = gather_52_cast_uint16_to_int32)[name = string("expand_dims_219")];
+            tensor<int32, [4]> concat_161 = const()[name = string("concat_161"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [1]> concat_162_values0_0 = const()[name = string("concat_162_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_162_values1_0 = const()[name = string("concat_162_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_162_values3_0 = const()[name = string("concat_162_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_162_axis_0 = const()[name = string("concat_162_axis_0"), val = int32(0)];
+            bool concat_162_interleave_0 = const()[name = string("concat_162_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_162 = concat(axis = concat_162_axis_0, interleave = concat_162_interleave_0, values = (concat_162_values0_0, concat_162_values1_0, expand_dims_219, concat_162_values3_0))[name = string("concat_162")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_27_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_27_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_27_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_27_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_161, begin_mask = k_cache2_internal_tensor_assign_27_begin_mask_0, end = concat_162, end_mask = k_cache2_internal_tensor_assign_27_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_27_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_27_stride_0, update = linear_52_cast_fp16, x = coreml_update_state_118)[name = string("k_cache2_internal_tensor_assign_27_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_27_cast_fp16, input = k_cache2)[name = string("coreml_update_state_120_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_120 = read_state(input = k_cache2)[name = string("coreml_update_state_120")];
+            tensor<int32, [3]> var_1027_shape_cast_fp16 = shape(x = linear_53_cast_fp16)[name = string("op_1027_shape_cast_fp16")];
+            int32 gather_53_axis_0 = const()[name = string("gather_53_axis_0"), val = int32(0)];
+            int32 gather_53_batch_dims_0 = const()[name = string("gather_53_batch_dims_0"), val = int32(0)];
+            bool gather_53_validate_indices_0 = const()[name = string("gather_53_validate_indices_0"), val = bool(false)];
+            string var_1027_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1027_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_53_to_uint16 = const()[name = string("select_53_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1027_shape_cast_fp16_to_uint16 = cast(dtype = var_1027_shape_cast_fp16_to_uint16_dtype_0, x = var_1027_shape_cast_fp16)[name = string("cast_93")];
+            uint16 gather_53_cast_uint16 = gather(axis = gather_53_axis_0, batch_dims = gather_53_batch_dims_0, indices = select_53_to_uint16, validate_indices = gather_53_validate_indices_0, x = var_1027_shape_cast_fp16_to_uint16)[name = string("gather_53_cast_uint16")];
+            string gather_53_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_53_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_223_axes_0 = const()[name = string("expand_dims_223_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_53_cast_uint16_to_int32 = cast(dtype = gather_53_cast_uint16_to_int32_dtype_0, x = gather_53_cast_uint16)[name = string("cast_92")];
+            tensor<int32, [1]> expand_dims_223 = expand_dims(axes = expand_dims_223_axes_0, x = gather_53_cast_uint16_to_int32)[name = string("expand_dims_223")];
+            tensor<int32, [4]> concat_164 = const()[name = string("concat_164"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [1]> concat_165_values0_0 = const()[name = string("concat_165_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)];
+            bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (concat_165_values0_0, concat_165_values1_0, expand_dims_223, concat_165_values3_0))[name = string("concat_165")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_27_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_27_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_27_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_27_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_164, begin_mask = v_cache2_internal_tensor_assign_27_begin_mask_0, end = concat_165, end_mask = v_cache2_internal_tensor_assign_27_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_27_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_27_stride_0, update = linear_53_cast_fp16, x = coreml_update_state_119)[name = string("v_cache2_internal_tensor_assign_27_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_27_cast_fp16, input = v_cache2)[name = string("coreml_update_state_121_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_121 = read_state(input = v_cache2)[name = string("coreml_update_state_121")];
+            tensor<fp16, [1280, 1280]> var_1049_to_fp16 = const()[name = string("op_1049_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213724416)))];
+            tensor<fp16, [1, ?, 1280]> linear_54_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1049_to_fp16, x = audio_data)[name = string("linear_54_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1053_to_fp16 = const()[name = string("op_1053_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217001280)))];
+            tensor<fp16, [1280]> var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220278144)))];
+            tensor<fp16, [1, ?, 1280]> linear_55_cast_fp16 = linear(bias = var_1054_to_fp16, weight = var_1053_to_fp16, x = audio_data)[name = string("linear_55_cast_fp16")];
+            tensor<int32, [3]> var_1056_shape_cast_fp16 = shape(x = linear_54_cast_fp16)[name = string("op_1056_shape_cast_fp16")];
+            int32 gather_54_axis_0 = const()[name = string("gather_54_axis_0"), val = int32(0)];
+            int32 gather_54_batch_dims_0 = const()[name = string("gather_54_batch_dims_0"), val = int32(0)];
+            bool gather_54_validate_indices_0 = const()[name = string("gather_54_validate_indices_0"), val = bool(false)];
+            string var_1056_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1056_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_54_to_uint16 = const()[name = string("select_54_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1056_shape_cast_fp16_to_uint16 = cast(dtype = var_1056_shape_cast_fp16_to_uint16_dtype_0, x = var_1056_shape_cast_fp16)[name = string("cast_91")];
+            uint16 gather_54_cast_uint16 = gather(axis = gather_54_axis_0, batch_dims = gather_54_batch_dims_0, indices = select_54_to_uint16, validate_indices = gather_54_validate_indices_0, x = var_1056_shape_cast_fp16_to_uint16)[name = string("gather_54_cast_uint16")];
+            string gather_54_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_54_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_54_cast_uint16_to_int32 = cast(dtype = gather_54_cast_uint16_to_int32_dtype_0, x = gather_54_cast_uint16)[name = string("cast_90")];
+            tensor<int32, [1]> expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = gather_54_cast_uint16_to_int32)[name = string("expand_dims_227")];
+            tensor<int32, [4]> concat_167 = const()[name = string("concat_167"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [1]> concat_168_values0_0 = const()[name = string("concat_168_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_168_values1_0 = const()[name = string("concat_168_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_168_values3_0 = const()[name = string("concat_168_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)];
+            bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (concat_168_values0_0, concat_168_values1_0, expand_dims_227, concat_168_values3_0))[name = string("concat_168")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_28_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_28_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_28_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_28_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_167, begin_mask = k_cache2_internal_tensor_assign_28_begin_mask_0, end = concat_168, end_mask = k_cache2_internal_tensor_assign_28_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_28_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_28_stride_0, update = linear_54_cast_fp16, x = coreml_update_state_120)[name = string("k_cache2_internal_tensor_assign_28_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_28_cast_fp16, input = k_cache2)[name = string("coreml_update_state_122_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_122 = read_state(input = k_cache2)[name = string("coreml_update_state_122")];
+            tensor<int32, [3]> var_1061_shape_cast_fp16 = shape(x = linear_55_cast_fp16)[name = string("op_1061_shape_cast_fp16")];
+            int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)];
+            int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)];
+            bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)];
+            string var_1061_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1061_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_55_to_uint16 = const()[name = string("select_55_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1061_shape_cast_fp16_to_uint16 = cast(dtype = var_1061_shape_cast_fp16_to_uint16_dtype_0, x = var_1061_shape_cast_fp16)[name = string("cast_89")];
+            uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_1061_shape_cast_fp16_to_uint16)[name = string("gather_55_cast_uint16")];
+            string gather_55_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_55_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_231_axes_0 = const()[name = string("expand_dims_231_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_55_cast_uint16_to_int32 = cast(dtype = gather_55_cast_uint16_to_int32_dtype_0, x = gather_55_cast_uint16)[name = string("cast_88")];
+            tensor<int32, [1]> expand_dims_231 = expand_dims(axes = expand_dims_231_axes_0, x = gather_55_cast_uint16_to_int32)[name = string("expand_dims_231")];
+            tensor<int32, [4]> concat_170 = const()[name = string("concat_170"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [1]> concat_171_values0_0 = const()[name = string("concat_171_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_171_values1_0 = const()[name = string("concat_171_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)];
+            bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (concat_171_values0_0, concat_171_values1_0, expand_dims_231, concat_171_values3_0))[name = string("concat_171")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_28_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_28_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_28_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_28_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_170, begin_mask = v_cache2_internal_tensor_assign_28_begin_mask_0, end = concat_171, end_mask = v_cache2_internal_tensor_assign_28_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_28_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_28_stride_0, update = linear_55_cast_fp16, x = coreml_update_state_121)[name = string("v_cache2_internal_tensor_assign_28_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_28_cast_fp16, input = v_cache2)[name = string("coreml_update_state_123_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_123 = read_state(input = v_cache2)[name = string("coreml_update_state_123")];
+            tensor<fp16, [1280, 1280]> var_1083_to_fp16 = const()[name = string("op_1083_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220280768)))];
+            tensor<fp16, [1, ?, 1280]> linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1083_to_fp16, x = audio_data)[name = string("linear_56_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1087_to_fp16 = const()[name = string("op_1087_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223557632)))];
+            tensor<fp16, [1280]> var_1088_to_fp16 = const()[name = string("op_1088_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226834496)))];
+            tensor<fp16, [1, ?, 1280]> linear_57_cast_fp16 = linear(bias = var_1088_to_fp16, weight = var_1087_to_fp16, x = audio_data)[name = string("linear_57_cast_fp16")];
+            tensor<int32, [3]> var_1090_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1090_shape_cast_fp16")];
+            int32 gather_56_axis_0 = const()[name = string("gather_56_axis_0"), val = int32(0)];
+            int32 gather_56_batch_dims_0 = const()[name = string("gather_56_batch_dims_0"), val = int32(0)];
+            bool gather_56_validate_indices_0 = const()[name = string("gather_56_validate_indices_0"), val = bool(false)];
+            string var_1090_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1090_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_56_to_uint16 = const()[name = string("select_56_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1090_shape_cast_fp16_to_uint16 = cast(dtype = var_1090_shape_cast_fp16_to_uint16_dtype_0, x = var_1090_shape_cast_fp16)[name = string("cast_87")];
+            uint16 gather_56_cast_uint16 = gather(axis = gather_56_axis_0, batch_dims = gather_56_batch_dims_0, indices = select_56_to_uint16, validate_indices = gather_56_validate_indices_0, x = var_1090_shape_cast_fp16_to_uint16)[name = string("gather_56_cast_uint16")];
+            string gather_56_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_56_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_235_axes_0 = const()[name = string("expand_dims_235_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_56_cast_uint16_to_int32 = cast(dtype = gather_56_cast_uint16_to_int32_dtype_0, x = gather_56_cast_uint16)[name = string("cast_86")];
+            tensor<int32, [1]> expand_dims_235 = expand_dims(axes = expand_dims_235_axes_0, x = gather_56_cast_uint16_to_int32)[name = string("expand_dims_235")];
+            tensor<int32, [4]> concat_173 = const()[name = string("concat_173"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [1]> concat_174_values0_0 = const()[name = string("concat_174_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_174_values1_0 = const()[name = string("concat_174_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_174_values3_0 = const()[name = string("concat_174_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_174_axis_0 = const()[name = string("concat_174_axis_0"), val = int32(0)];
+            bool concat_174_interleave_0 = const()[name = string("concat_174_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_174 = concat(axis = concat_174_axis_0, interleave = concat_174_interleave_0, values = (concat_174_values0_0, concat_174_values1_0, expand_dims_235, concat_174_values3_0))[name = string("concat_174")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_29_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_29_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_29_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_29_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_173, begin_mask = k_cache2_internal_tensor_assign_29_begin_mask_0, end = concat_174, end_mask = k_cache2_internal_tensor_assign_29_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_29_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_29_stride_0, update = linear_56_cast_fp16, x = coreml_update_state_122)[name = string("k_cache2_internal_tensor_assign_29_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_29_cast_fp16, input = k_cache2)[name = string("coreml_update_state_124_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_124 = read_state(input = k_cache2)[name = string("coreml_update_state_124")];
+            tensor<int32, [3]> var_1095_shape_cast_fp16 = shape(x = linear_57_cast_fp16)[name = string("op_1095_shape_cast_fp16")];
+            int32 gather_57_axis_0 = const()[name = string("gather_57_axis_0"), val = int32(0)];
+            int32 gather_57_batch_dims_0 = const()[name = string("gather_57_batch_dims_0"), val = int32(0)];
+            bool gather_57_validate_indices_0 = const()[name = string("gather_57_validate_indices_0"), val = bool(false)];
+            string var_1095_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1095_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_57_to_uint16 = const()[name = string("select_57_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1095_shape_cast_fp16_to_uint16 = cast(dtype = var_1095_shape_cast_fp16_to_uint16_dtype_0, x = var_1095_shape_cast_fp16)[name = string("cast_85")];
+            uint16 gather_57_cast_uint16 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57_to_uint16, validate_indices = gather_57_validate_indices_0, x = var_1095_shape_cast_fp16_to_uint16)[name = string("gather_57_cast_uint16")];
+            string gather_57_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_57_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_239_axes_0 = const()[name = string("expand_dims_239_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_57_cast_uint16_to_int32 = cast(dtype = gather_57_cast_uint16_to_int32_dtype_0, x = gather_57_cast_uint16)[name = string("cast_84")];
+            tensor<int32, [1]> expand_dims_239 = expand_dims(axes = expand_dims_239_axes_0, x = gather_57_cast_uint16_to_int32)[name = string("expand_dims_239")];
+            tensor<int32, [4]> concat_176 = const()[name = string("concat_176"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [1]> concat_177_values0_0 = const()[name = string("concat_177_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_177_values1_0 = const()[name = string("concat_177_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_177_values3_0 = const()[name = string("concat_177_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_177_axis_0 = const()[name = string("concat_177_axis_0"), val = int32(0)];
+            bool concat_177_interleave_0 = const()[name = string("concat_177_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_177 = concat(axis = concat_177_axis_0, interleave = concat_177_interleave_0, values = (concat_177_values0_0, concat_177_values1_0, expand_dims_239, concat_177_values3_0))[name = string("concat_177")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_29_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_29_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_29_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_29_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_176, begin_mask = v_cache2_internal_tensor_assign_29_begin_mask_0, end = concat_177, end_mask = v_cache2_internal_tensor_assign_29_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_29_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_29_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_123)[name = string("v_cache2_internal_tensor_assign_29_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_29_cast_fp16, input = v_cache2)[name = string("coreml_update_state_125_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_125 = read_state(input = v_cache2)[name = string("coreml_update_state_125")];
+            tensor<fp16, [1280, 1280]> var_1117_to_fp16 = const()[name = string("op_1117_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226837120)))];
+            tensor<fp16, [1, ?, 1280]> linear_58_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1117_to_fp16, x = audio_data)[name = string("linear_58_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1121_to_fp16 = const()[name = string("op_1121_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230113984)))];
+            tensor<fp16, [1280]> var_1122_to_fp16 = const()[name = string("op_1122_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233390848)))];
+            tensor<fp16, [1, ?, 1280]> linear_59_cast_fp16 = linear(bias = var_1122_to_fp16, weight = var_1121_to_fp16, x = audio_data)[name = string("linear_59_cast_fp16")];
+            tensor<int32, [3]> var_1124_shape_cast_fp16 = shape(x = linear_58_cast_fp16)[name = string("op_1124_shape_cast_fp16")];
+            int32 gather_58_axis_0 = const()[name = string("gather_58_axis_0"), val = int32(0)];
+            int32 gather_58_batch_dims_0 = const()[name = string("gather_58_batch_dims_0"), val = int32(0)];
+            bool gather_58_validate_indices_0 = const()[name = string("gather_58_validate_indices_0"), val = bool(false)];
+            string var_1124_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1124_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_58_to_uint16 = const()[name = string("select_58_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1124_shape_cast_fp16_to_uint16 = cast(dtype = var_1124_shape_cast_fp16_to_uint16_dtype_0, x = var_1124_shape_cast_fp16)[name = string("cast_83")];
+            uint16 gather_58_cast_uint16 = gather(axis = gather_58_axis_0, batch_dims = gather_58_batch_dims_0, indices = select_58_to_uint16, validate_indices = gather_58_validate_indices_0, x = var_1124_shape_cast_fp16_to_uint16)[name = string("gather_58_cast_uint16")];
+            string gather_58_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_58_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_58_cast_uint16_to_int32 = cast(dtype = gather_58_cast_uint16_to_int32_dtype_0, x = gather_58_cast_uint16)[name = string("cast_82")];
+            tensor<int32, [1]> expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = gather_58_cast_uint16_to_int32)[name = string("expand_dims_243")];
+            tensor<int32, [4]> concat_179 = const()[name = string("concat_179"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [1]> concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_180_values1_0 = const()[name = string("concat_180_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_180_values3_0 = const()[name = string("concat_180_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)];
+            bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, concat_180_values1_0, expand_dims_243, concat_180_values3_0))[name = string("concat_180")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_30_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_30_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_30_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_30_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_179, begin_mask = k_cache2_internal_tensor_assign_30_begin_mask_0, end = concat_180, end_mask = k_cache2_internal_tensor_assign_30_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_30_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_30_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_124)[name = string("k_cache2_internal_tensor_assign_30_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_30_cast_fp16, input = k_cache2)[name = string("coreml_update_state_126_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_126 = read_state(input = k_cache2)[name = string("coreml_update_state_126")];
+            tensor<int32, [3]> var_1129_shape_cast_fp16 = shape(x = linear_59_cast_fp16)[name = string("op_1129_shape_cast_fp16")];
+            int32 gather_59_axis_0 = const()[name = string("gather_59_axis_0"), val = int32(0)];
+            int32 gather_59_batch_dims_0 = const()[name = string("gather_59_batch_dims_0"), val = int32(0)];
+            bool gather_59_validate_indices_0 = const()[name = string("gather_59_validate_indices_0"), val = bool(false)];
+            string var_1129_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1129_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_59_to_uint16 = const()[name = string("select_59_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1129_shape_cast_fp16_to_uint16 = cast(dtype = var_1129_shape_cast_fp16_to_uint16_dtype_0, x = var_1129_shape_cast_fp16)[name = string("cast_81")];
+            uint16 gather_59_cast_uint16 = gather(axis = gather_59_axis_0, batch_dims = gather_59_batch_dims_0, indices = select_59_to_uint16, validate_indices = gather_59_validate_indices_0, x = var_1129_shape_cast_fp16_to_uint16)[name = string("gather_59_cast_uint16")];
+            string gather_59_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_59_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_247_axes_0 = const()[name = string("expand_dims_247_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_59_cast_uint16_to_int32 = cast(dtype = gather_59_cast_uint16_to_int32_dtype_0, x = gather_59_cast_uint16)[name = string("cast_80")];
+            tensor<int32, [1]> expand_dims_247 = expand_dims(axes = expand_dims_247_axes_0, x = gather_59_cast_uint16_to_int32)[name = string("expand_dims_247")];
+            tensor<int32, [4]> concat_182 = const()[name = string("concat_182"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [1]> concat_183_values0_0 = const()[name = string("concat_183_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)];
+            bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (concat_183_values0_0, concat_183_values1_0, expand_dims_247, concat_183_values3_0))[name = string("concat_183")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_30_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_30_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_30_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_30_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_182, begin_mask = v_cache2_internal_tensor_assign_30_begin_mask_0, end = concat_183, end_mask = v_cache2_internal_tensor_assign_30_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_30_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_30_stride_0, update = linear_59_cast_fp16, x = coreml_update_state_125)[name = string("v_cache2_internal_tensor_assign_30_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_30_cast_fp16, input = v_cache2)[name = string("coreml_update_state_127_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_127 = read_state(input = v_cache2)[name = string("coreml_update_state_127")];
+            tensor<fp16, [1280, 1280]> var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233393472)))];
+            tensor<fp16, [1, ?, 1280]> linear_60_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1151_to_fp16, x = audio_data)[name = string("linear_60_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1155_to_fp16 = const()[name = string("op_1155_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236670336)))];
+            tensor<fp16, [1280]> var_1156_to_fp16 = const()[name = string("op_1156_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239947200)))];
+            tensor<fp16, [1, ?, 1280]> linear_61_cast_fp16 = linear(bias = var_1156_to_fp16, weight = var_1155_to_fp16, x = audio_data)[name = string("linear_61_cast_fp16")];
+            tensor<int32, [3]> var_1158_shape_cast_fp16 = shape(x = linear_60_cast_fp16)[name = string("op_1158_shape_cast_fp16")];
+            int32 gather_60_axis_0 = const()[name = string("gather_60_axis_0"), val = int32(0)];
+            int32 gather_60_batch_dims_0 = const()[name = string("gather_60_batch_dims_0"), val = int32(0)];
+            bool gather_60_validate_indices_0 = const()[name = string("gather_60_validate_indices_0"), val = bool(false)];
+            string var_1158_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1158_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_60_to_uint16 = const()[name = string("select_60_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1158_shape_cast_fp16_to_uint16 = cast(dtype = var_1158_shape_cast_fp16_to_uint16_dtype_0, x = var_1158_shape_cast_fp16)[name = string("cast_79")];
+            uint16 gather_60_cast_uint16 = gather(axis = gather_60_axis_0, batch_dims = gather_60_batch_dims_0, indices = select_60_to_uint16, validate_indices = gather_60_validate_indices_0, x = var_1158_shape_cast_fp16_to_uint16)[name = string("gather_60_cast_uint16")];
+            string gather_60_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_60_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_251_axes_0 = const()[name = string("expand_dims_251_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_60_cast_uint16_to_int32 = cast(dtype = gather_60_cast_uint16_to_int32_dtype_0, x = gather_60_cast_uint16)[name = string("cast_78")];
+            tensor<int32, [1]> expand_dims_251 = expand_dims(axes = expand_dims_251_axes_0, x = gather_60_cast_uint16_to_int32)[name = string("expand_dims_251")];
+            tensor<int32, [4]> concat_185 = const()[name = string("concat_185"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [1]> concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_186_values1_0 = const()[name = string("concat_186_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_186_values3_0 = const()[name = string("concat_186_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)];
+            bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, concat_186_values1_0, expand_dims_251, concat_186_values3_0))[name = string("concat_186")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_31_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_31_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_31_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_31_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_185, begin_mask = k_cache2_internal_tensor_assign_31_begin_mask_0, end = concat_186, end_mask = k_cache2_internal_tensor_assign_31_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_31_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_31_stride_0, update = linear_60_cast_fp16, x = coreml_update_state_126)[name = string("k_cache2_internal_tensor_assign_31_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_31_cast_fp16, input = k_cache2)[name = string("coreml_update_state_128_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_128 = read_state(input = k_cache2)[name = string("coreml_update_state_128")];
+            tensor<int32, [3]> var_1163_shape_cast_fp16 = shape(x = linear_61_cast_fp16)[name = string("op_1163_shape_cast_fp16")];
+            int32 gather_61_axis_0 = const()[name = string("gather_61_axis_0"), val = int32(0)];
+            int32 gather_61_batch_dims_0 = const()[name = string("gather_61_batch_dims_0"), val = int32(0)];
+            bool gather_61_validate_indices_0 = const()[name = string("gather_61_validate_indices_0"), val = bool(false)];
+            string var_1163_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1163_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_61_to_uint16 = const()[name = string("select_61_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1163_shape_cast_fp16_to_uint16 = cast(dtype = var_1163_shape_cast_fp16_to_uint16_dtype_0, x = var_1163_shape_cast_fp16)[name = string("cast_77")];
+            uint16 gather_61_cast_uint16 = gather(axis = gather_61_axis_0, batch_dims = gather_61_batch_dims_0, indices = select_61_to_uint16, validate_indices = gather_61_validate_indices_0, x = var_1163_shape_cast_fp16_to_uint16)[name = string("gather_61_cast_uint16")];
+            string gather_61_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_61_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_255_axes_0 = const()[name = string("expand_dims_255_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_61_cast_uint16_to_int32 = cast(dtype = gather_61_cast_uint16_to_int32_dtype_0, x = gather_61_cast_uint16)[name = string("cast_76")];
+            tensor<int32, [1]> expand_dims_255 = expand_dims(axes = expand_dims_255_axes_0, x = gather_61_cast_uint16_to_int32)[name = string("expand_dims_255")];
+            tensor<int32, [4]> concat_188 = const()[name = string("concat_188"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [1]> concat_189_values0_0 = const()[name = string("concat_189_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_189_values1_0 = const()[name = string("concat_189_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_189_values3_0 = const()[name = string("concat_189_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_189_axis_0 = const()[name = string("concat_189_axis_0"), val = int32(0)];
+            bool concat_189_interleave_0 = const()[name = string("concat_189_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_189 = concat(axis = concat_189_axis_0, interleave = concat_189_interleave_0, values = (concat_189_values0_0, concat_189_values1_0, expand_dims_255, concat_189_values3_0))[name = string("concat_189")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_31_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_31_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_31_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_31_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_188, begin_mask = v_cache2_internal_tensor_assign_31_begin_mask_0, end = concat_189, end_mask = v_cache2_internal_tensor_assign_31_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_31_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_31_stride_0, update = linear_61_cast_fp16, x = coreml_update_state_127)[name = string("v_cache2_internal_tensor_assign_31_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_31_cast_fp16, input = v_cache2)[name = string("coreml_update_state_129_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_129 = read_state(input = v_cache2)[name = string("coreml_update_state_129")];
+            tensor<fp16, [1280, 1280]> var_1185_to_fp16 = const()[name = string("op_1185_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239949824)))];
+            tensor<fp16, [1, ?, 1280]> linear_62_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1185_to_fp16, x = audio_data)[name = string("linear_62_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1189_to_fp16 = const()[name = string("op_1189_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243226688)))];
+            tensor<fp16, [1280]> var_1190_to_fp16 = const()[name = string("op_1190_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246503552)))];
+            tensor<fp16, [1, ?, 1280]> linear_63_cast_fp16 = linear(bias = var_1190_to_fp16, weight = var_1189_to_fp16, x = audio_data)[name = string("linear_63_cast_fp16")];
+            tensor<int32, [3]> var_1192_shape_cast_fp16 = shape(x = linear_62_cast_fp16)[name = string("op_1192_shape_cast_fp16")];
+            int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)];
+            int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)];
+            bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)];
+            string var_1192_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1192_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1192_shape_cast_fp16_to_uint16 = cast(dtype = var_1192_shape_cast_fp16_to_uint16_dtype_0, x = var_1192_shape_cast_fp16)[name = string("cast_75")];
+            uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1192_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")];
+            string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_74")];
+            tensor<int32, [1]> expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = gather_62_cast_uint16_to_int32)[name = string("expand_dims_259")];
+            tensor<int32, [4]> concat_191 = const()[name = string("concat_191"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [1]> concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_192_values1_0 = const()[name = string("concat_192_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_192_values3_0 = const()[name = string("concat_192_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)];
+            bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, concat_192_values1_0, expand_dims_259, concat_192_values3_0))[name = string("concat_192")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_32_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_32_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_32_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_32_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_191, begin_mask = k_cache2_internal_tensor_assign_32_begin_mask_0, end = concat_192, end_mask = k_cache2_internal_tensor_assign_32_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_32_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_32_stride_0, update = linear_62_cast_fp16, x = coreml_update_state_128)[name = string("k_cache2_internal_tensor_assign_32_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_32_cast_fp16, input = k_cache2)[name = string("coreml_update_state_130_write_state")];
+            tensor<int32, [3]> var_1197_shape_cast_fp16 = shape(x = linear_63_cast_fp16)[name = string("op_1197_shape_cast_fp16")];
+            int32 gather_63_axis_0 = const()[name = string("gather_63_axis_0"), val = int32(0)];
+            int32 gather_63_batch_dims_0 = const()[name = string("gather_63_batch_dims_0"), val = int32(0)];
+            bool gather_63_validate_indices_0 = const()[name = string("gather_63_validate_indices_0"), val = bool(false)];
+            string var_1197_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1197_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_63_to_uint16 = const()[name = string("select_63_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1197_shape_cast_fp16_to_uint16 = cast(dtype = var_1197_shape_cast_fp16_to_uint16_dtype_0, x = var_1197_shape_cast_fp16)[name = string("cast_73")];
+            uint16 gather_63_cast_uint16 = gather(axis = gather_63_axis_0, batch_dims = gather_63_batch_dims_0, indices = select_63_to_uint16, validate_indices = gather_63_validate_indices_0, x = var_1197_shape_cast_fp16_to_uint16)[name = string("gather_63_cast_uint16")];
+            string gather_63_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_63_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_263_axes_0 = const()[name = string("expand_dims_263_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_63_cast_uint16_to_int32 = cast(dtype = gather_63_cast_uint16_to_int32_dtype_0, x = gather_63_cast_uint16)[name = string("cast_72")];
+            tensor<int32, [1]> expand_dims_263 = expand_dims(axes = expand_dims_263_axes_0, x = gather_63_cast_uint16_to_int32)[name = string("expand_dims_263")];
+            tensor<int32, [4]> concat_194 = const()[name = string("concat_194"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [1]> concat_195_values0_0 = const()[name = string("concat_195_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_195_values1_0 = const()[name = string("concat_195_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_195_values3_0 = const()[name = string("concat_195_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)];
+            bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (concat_195_values0_0, concat_195_values1_0, expand_dims_263, concat_195_values3_0))[name = string("concat_195")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_32_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_32_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_32_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_32_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_194, begin_mask = v_cache2_internal_tensor_assign_32_begin_mask_0, end = concat_195, end_mask = v_cache2_internal_tensor_assign_32_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_32_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_32_stride_0, update = linear_63_cast_fp16, x = coreml_update_state_129)[name = string("v_cache2_internal_tensor_assign_32_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_32_cast_fp16, input = v_cache2)[name = string("coreml_update_state_131_write_state")];
+        } -> (dummy);
+}
\ No newline at end of file
diff --git a/large-v2/decoder_first.mlmodelc/weights/weight.bin b/large-v2/decoder_first.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..eeda721bfad61ab73d33a009d8b13c38c825cc19
--- /dev/null
+++ b/large-v2/decoder_first.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ac46c34d51832dd11fbc34c772a9a35a5fb4cace68406b7044dd4ba652dca1c
+size 246506176
diff --git a/large-v2/decoder_second.mlmodelc/analytics/coremldata.bin b/large-v2/decoder_second.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..52ba52f49e9149434ffb00691f5e035298ccf6a1
--- /dev/null
+++ b/large-v2/decoder_second.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1afd3cf2ab05eb2b7268afb62f418b5df01b6b5d60d746bdeec2b5ad8d760f65
+size 243
diff --git a/large-v2/decoder_second.mlmodelc/coremldata.bin b/large-v2/decoder_second.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..06d384eab53d9890f25a9f07a76b3771dcd2b170
--- /dev/null
+++ b/large-v2/decoder_second.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5601244df54c60a16c26b761742867d06c6ef440ab8b0776ce5f6d1b4875c95
+size 487
diff --git a/large-v2/decoder_second.mlmodelc/metadata.json b/large-v2/decoder_second.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..722e4912d37eb8c71f0d55eb4ea48b33db80210d
--- /dev/null
+++ b/large-v2/decoder_second.mlmodelc/metadata.json
@@ -0,0 +1,127 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.linear" : 257,
+      "Ios18.readState" : 66,
+      "Ios18.expandDims" : 33,
+      "Ios18.sub" : 1,
+      "Ios18.matmul" : 128,
+      "Ios18.gelu" : 32,
+      "Ios18.gather" : 35,
+      "Ios18.concat" : 162,
+      "Shape" : 34,
+      "Ios18.add" : 161,
+      "Ios18.sliceUpdate" : 128,
+      "Ios18.sliceByIndex" : 257,
+      "Ios18.layerNorm" : 97,
+      "Ios18.cast" : 68,
+      "Ios18.transpose" : 256,
+      "Ios18.writeState" : 64,
+      "Ios18.reshape" : 256,
+      "Ios18.softmax" : 64,
+      "Ios18.mul" : 128
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 448 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 448, 1280]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 448 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 448, 1280]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 1500, 1280]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 1500, 1280]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Int32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Int32 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "token_data",
+        "shortDescription" : ""
+      },
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Float16 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "offset_mask",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_second",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/large-v2/decoder_second.mlmodelc/model.mil b/large-v2/decoder_second.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..897dc495c4bc16b52a2d3e9398cae3acf9ec05bc
--- /dev/null
+++ b/large-v2/decoder_second.mlmodelc/model.mil
@@ -0,0 +1,6298 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(state<tensor<fp16, [32, 1, 448, 1280]>> k_cache1, state<tensor<fp16, [32, 1, 1500, 1280]>> k_cache2, tensor<fp16, [1, ?]> offset_mask, tensor<int32, [1, ?]> token_data, state<tensor<fp16, [32, 1, 448, 1280]>> v_cache1, state<tensor<fp16, [32, 1, 1500, 1280]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] {
+            tensor<int32, [2]> var_78_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_78_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_78_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_78_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [2]> var_78_shape_cast_fp16_to_int16 = cast(dtype = var_78_shape_cast_fp16_to_int16_dtype_0, x = var_78_shape_cast_fp16)[name = string("cast_394")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_78_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [2]> var_82_shape = shape(x = token_data)[name = string("op_82_shape")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_82_shape_to_uint16_dtype_0 = const()[name = string("op_82_shape_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [2]> var_82_shape_to_uint16 = cast(dtype = var_82_shape_to_uint16_dtype_0, x = var_82_shape)[name = string("cast_392")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_82_shape_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_391")];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_393")];
+            int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")];
+            int32 var_154_axis_0 = const()[name = string("op_154_axis_0"), val = int32(0)];
+            int32 var_154_batch_dims_0 = const()[name = string("op_154_batch_dims_0"), val = int32(0)];
+            bool var_154_validate_indices_0 = const()[name = string("op_154_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [51865, 1280]> token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor<fp16, [51865, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, ?, 1280]> var_154_cast_fp16 = gather(axis = var_154_axis_0, batch_dims = var_154_batch_dims_0, indices = token_data, validate_indices = var_154_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_154_cast_fp16")];
+            int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)];
+            int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)];
+            bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")];
+            int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(1280)];
+            int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)];
+            bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")];
+            tensor<bool, [2]> var_157_end_mask_0 = const()[name = string("op_157_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 1280]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [448, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132774528)))];
+            tensor<fp16, [?, ?]> var_157_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_157_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_157_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_3_cast_fp16 = add(x = var_154_cast_fp16, y = var_157_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<fp16, [32, 1, 448, 1280]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")];
+            tensor<fp16, [32, 1, 448, 1280]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")];
+            tensor<fp16, [32, 1, 1500, 1280]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<int32, [4]> k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")];
+            tensor<fp16, [32, 1, 1500, 1280]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<int32, [4]> v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")];
+            int32 var_180 = const()[name = string("op_180"), val = int32(-1)];
+            tensor<int32, [1]> var_198_axes_0 = const()[name = string("op_198_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133921472)))];
+            tensor<fp16, [1280]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133924096)))];
+            fp16 var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_198_cast_fp16 = layer_norm(axes = var_198_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_198_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_209_to_fp16 = const()[name = string("op_209_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133926720)))];
+            tensor<fp16, [1280]> var_210_to_fp16 = const()[name = string("op_210_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137203584)))];
+            tensor<fp16, [1, ?, 1280]> linear_0_cast_fp16 = linear(bias = var_210_to_fp16, weight = var_209_to_fp16, x = var_198_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137206208)))];
+            tensor<fp16, [1280]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140483072)))];
+            tensor<fp16, [1, ?, 1280]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_213_to_fp16, x = var_198_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140485696)))];
+            tensor<fp16, [1280]> var_218_to_fp16 = const()[name = string("op_218_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143762560)))];
+            tensor<fp16, [1, ?, 1280]> linear_2_cast_fp16 = linear(bias = var_218_to_fp16, weight = var_217_to_fp16, x = var_198_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [3]> var_220_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_220_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_220_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_220_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_220_shape_cast_fp16_to_uint16 = cast(dtype = var_220_shape_cast_fp16_to_uint16_dtype_0, x = var_220_shape_cast_fp16)[name = string("cast_390")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_220_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_389")];
+            int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")];
+            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")];
+            tensor<int32, [1]> expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")];
+            tensor<int32, [1]> concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)];
+            bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")];
+            tensor<int32, [1]> concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)];
+            bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_64_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_64 = read_state(input = k_cache1)[name = string("coreml_update_state_64")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_65_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_65 = read_state(input = v_cache1)[name = string("coreml_update_state_65")];
+            int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)];
+            int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(1280)];
+            int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
+            bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")];
+            tensor<int32, [3]> var_236_begin_0 = const()[name = string("op_236_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_236_end_mask_0 = const()[name = string("op_236_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = concat_10, end_mask = var_236_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_236_cast_fp16")];
+            tensor<int32, [3]> var_239_begin_0 = const()[name = string("op_239_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_239_end_mask_0 = const()[name = string("op_239_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = concat_10, end_mask = var_239_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_239_cast_fp16")];
+            tensor<int32, [4]> concat_12x = const()[name = string("concat_12x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_249_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_249_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_3_cast_fp16 = mul(x = var_249_cast_fp16, y = const_160_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> concat_13x = const()[name = string("concat_13x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_256_cast_fp16 = reshape(shape = concat_13x, x = var_236_cast_fp16)[name = string("op_256_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_5_cast_fp16 = mul(x = var_256_cast_fp16, y = const_161_to_fp16)[name = string("k_5_cast_fp16")];
+            tensor<int32, [4]> concat_14x = const()[name = string("concat_14x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_263_cast_fp16 = reshape(shape = concat_14x, x = var_239_cast_fp16)[name = string("op_263_cast_fp16")];
+            tensor<int32, [4]> var_264 = const()[name = string("op_264"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_257_perm_0 = const()[name = string("transpose_257_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_258_perm_0 = const()[name = string("transpose_258_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_258 = transpose(perm = transpose_258_perm_0, x = k_5_cast_fp16)[name = string("transpose_638")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_257 = transpose(perm = transpose_257_perm_0, x = q_3_cast_fp16)[name = string("transpose_639")];
+            tensor<fp16, [1, 20, ?, ?]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_257, y = transpose_258)[name = string("qk_1_cast_fp16")];
+            int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")];
+            tensor<int32, [2]> var_267_begin_0 = const()[name = string("op_267_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_267_end_mask_0 = const()[name = string("op_267_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 448]> mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor<fp16, [448, 448]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143765184)))];
+            tensor<fp16, [?, 448]> var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = concat_15, end_mask = var_267_end_mask_0, x = mask_to_fp16)[name = string("op_267_cast_fp16")];
+            int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)];
+            int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)];
+            bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")];
+            tensor<int32, [2]> var_268_begin_0 = const()[name = string("op_268_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_268_end_mask_0 = const()[name = string("op_268_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_268_cast_fp16 = slice_by_index(begin = var_268_begin_0, end = concat_16, end_mask = var_268_end_mask_0, x = var_267_cast_fp16)[name = string("op_268_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_268_cast_fp16)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_271_cast_fp16 = softmax(axis = var_180, x = qk_3_cast_fp16)[name = string("op_271_cast_fp16")];
+            bool var_273_transpose_x_0 = const()[name = string("op_273_transpose_x_0"), val = bool(false)];
+            bool var_273_transpose_y_0 = const()[name = string("op_273_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_5_cast_fp16 = transpose(perm = var_264, x = var_263_cast_fp16)[name = string("transpose_640")];
+            tensor<fp16, [1, 20, ?, 64]> var_273_cast_fp16 = matmul(transpose_x = var_273_transpose_x_0, transpose_y = var_273_transpose_y_0, x = var_271_cast_fp16, y = v_5_cast_fp16)[name = string("op_273_cast_fp16")];
+            tensor<int32, [4]> var_274 = const()[name = string("op_274"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_17x = const()[name = string("concat_17x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_275_cast_fp16 = transpose(perm = var_274, x = var_273_cast_fp16)[name = string("transpose_637")];
+            tensor<fp16, [1, ?, 1280]> x_7_cast_fp16 = reshape(shape = concat_17x, x = var_275_cast_fp16)[name = string("x_7_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144166656)))];
+            tensor<fp16, [1280]> var_280_to_fp16 = const()[name = string("op_280_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147443520)))];
+            tensor<fp16, [1, ?, 1280]> linear_3_cast_fp16 = linear(bias = var_280_to_fp16, weight = var_279_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")];
+            tensor<int32, [1]> var_287_axes_0 = const()[name = string("op_287_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147446144)))];
+            tensor<fp16, [1280]> blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147448768)))];
+            tensor<fp16, [1, ?, 1280]> var_287_cast_fp16 = layer_norm(axes = var_287_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_287_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_296_to_fp16 = const()[name = string("op_296_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147451392)))];
+            tensor<fp16, [1280]> var_297_to_fp16 = const()[name = string("op_297_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150728256)))];
+            tensor<fp16, [1, ?, 1280]> linear_4_cast_fp16 = linear(bias = var_297_to_fp16, weight = var_296_to_fp16, x = var_287_cast_fp16)[name = string("linear_4_cast_fp16")];
+            tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor<fp16, [1, 1500, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150730880)))];
+            tensor<fp16, [1, 1500, 1280]> k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_22x = const()[name = string("concat_22x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_317_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_317_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_7_cast_fp16 = mul(x = var_317_cast_fp16, y = const_162_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_323 = const()[name = string("op_323"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_324_cast_fp16 = reshape(shape = var_323, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_324_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_9_cast_fp16 = mul(x = var_324_cast_fp16, y = const_163_to_fp16)[name = string("k_9_cast_fp16")];
+            tensor<int32, [4]> var_330 = const()[name = string("op_330"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_331_cast_fp16 = reshape(shape = var_330, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_331_cast_fp16")];
+            tensor<int32, [4]> var_332 = const()[name = string("op_332"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_259_perm_0 = const()[name = string("transpose_259_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_260_perm_0 = const()[name = string("transpose_260_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_260 = transpose(perm = transpose_260_perm_0, x = k_9_cast_fp16)[name = string("transpose_634")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_259 = transpose(perm = transpose_259_perm_0, x = q_7_cast_fp16)[name = string("transpose_635")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_259, y = transpose_260)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_336_cast_fp16 = softmax(axis = var_180, x = qk_5_cast_fp16)[name = string("op_336_cast_fp16")];
+            bool var_338_transpose_x_0 = const()[name = string("op_338_transpose_x_0"), val = bool(false)];
+            bool var_338_transpose_y_0 = const()[name = string("op_338_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_9_cast_fp16 = transpose(perm = var_332, x = var_331_cast_fp16)[name = string("transpose_636")];
+            tensor<fp16, [1, 20, ?, 64]> var_338_cast_fp16 = matmul(transpose_x = var_338_transpose_x_0, transpose_y = var_338_transpose_y_0, x = var_336_cast_fp16, y = v_9_cast_fp16)[name = string("op_338_cast_fp16")];
+            tensor<int32, [4]> var_339 = const()[name = string("op_339"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_23x = const()[name = string("concat_23x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_340_cast_fp16 = transpose(perm = var_339, x = var_338_cast_fp16)[name = string("transpose_633")];
+            tensor<fp16, [1, ?, 1280]> x_13_cast_fp16 = reshape(shape = concat_23x, x = var_340_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154570944)))];
+            tensor<fp16, [1280]> var_345_to_fp16 = const()[name = string("op_345_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157847808)))];
+            tensor<fp16, [1, ?, 1280]> linear_5_cast_fp16 = linear(bias = var_345_to_fp16, weight = var_344_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")];
+            tensor<int32, [1]> var_352_axes_0 = const()[name = string("op_352_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157850432)))];
+            tensor<fp16, [1280]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157853056)))];
+            tensor<fp16, [1, ?, 1280]> var_352_cast_fp16 = layer_norm(axes = var_352_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_352_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_361_to_fp16 = const()[name = string("op_361_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157855680)))];
+            tensor<fp16, [5120]> var_362_to_fp16 = const()[name = string("op_362_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170962944)))];
+            tensor<fp16, [1, ?, 5120]> linear_6_cast_fp16 = linear(bias = var_362_to_fp16, weight = var_361_to_fp16, x = var_352_cast_fp16)[name = string("linear_6_cast_fp16")];
+            string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170973248)))];
+            tensor<fp16, [1280]> var_368_to_fp16 = const()[name = string("op_368_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184080512)))];
+            tensor<fp16, [1, ?, 1280]> linear_7_cast_fp16 = linear(bias = var_368_to_fp16, weight = var_367_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_64)[name = string("k_cache_5_cast_fp16")];
+            tensor<int32, [4]> v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_65)[name = string("v_cache_5_cast_fp16")];
+            tensor<int32, [4]> k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")];
+            tensor<int32, [4]> v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")];
+            int32 var_391 = const()[name = string("op_391"), val = int32(-1)];
+            tensor<int32, [1]> var_409_axes_0 = const()[name = string("op_409_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184083136)))];
+            tensor<fp16, [1280]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184085760)))];
+            fp16 var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_409_cast_fp16 = layer_norm(axes = var_409_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_409_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_420_to_fp16 = const()[name = string("op_420_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184088384)))];
+            tensor<fp16, [1280]> var_421_to_fp16 = const()[name = string("op_421_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187365248)))];
+            tensor<fp16, [1, ?, 1280]> linear_8_cast_fp16 = linear(bias = var_421_to_fp16, weight = var_420_to_fp16, x = var_409_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_424_to_fp16 = const()[name = string("op_424_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187367872)))];
+            tensor<fp16, [1, ?, 1280]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_424_to_fp16, x = var_409_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_428_to_fp16 = const()[name = string("op_428_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190644736)))];
+            tensor<fp16, [1280]> var_429_to_fp16 = const()[name = string("op_429_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193921600)))];
+            tensor<fp16, [1, ?, 1280]> linear_10_cast_fp16 = linear(bias = var_429_to_fp16, weight = var_428_to_fp16, x = var_409_cast_fp16)[name = string("linear_10_cast_fp16")];
+            tensor<int32, [3]> var_431_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_431_shape_cast_fp16")];
+            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
+            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
+            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
+            string var_431_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_431_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_431_shape_cast_fp16_to_uint16 = cast(dtype = var_431_shape_cast_fp16_to_uint16_dtype_0, x = var_431_shape_cast_fp16)[name = string("cast_388")];
+            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_431_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
+            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_387")];
+            int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")];
+            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")];
+            tensor<int32, [1]> concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor<int32, [1]>([1])];
+            int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
+            bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_64)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_66 = read_state(input = k_cache1)[name = string("coreml_update_state_66")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_65)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_67 = read_state(input = v_cache1)[name = string("coreml_update_state_67")];
+            int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)];
+            int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(1280)];
+            int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)];
+            bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")];
+            tensor<int32, [3]> var_447_begin_0 = const()[name = string("op_447_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_447_end_mask_0 = const()[name = string("op_447_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_447_cast_fp16 = slice_by_index(begin = var_447_begin_0, end = concat_32, end_mask = var_447_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_447_cast_fp16")];
+            tensor<int32, [3]> var_450_begin_0 = const()[name = string("op_450_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_450_end_mask_0 = const()[name = string("op_450_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_450_cast_fp16 = slice_by_index(begin = var_450_begin_0, end = concat_32, end_mask = var_450_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_450_cast_fp16")];
+            tensor<int32, [4]> concat_34x = const()[name = string("concat_34x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_460_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_460_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_164_to_fp16 = const()[name = string("const_164_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_11_cast_fp16 = mul(x = var_460_cast_fp16, y = const_164_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> concat_35x = const()[name = string("concat_35x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_467_cast_fp16 = reshape(shape = concat_35x, x = var_447_cast_fp16)[name = string("op_467_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_165_to_fp16 = const()[name = string("const_165_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_15_cast_fp16 = mul(x = var_467_cast_fp16, y = const_165_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> concat_36x = const()[name = string("concat_36x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_474_cast_fp16 = reshape(shape = concat_36x, x = var_450_cast_fp16)[name = string("op_474_cast_fp16")];
+            tensor<int32, [4]> var_475 = const()[name = string("op_475"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_261_perm_0 = const()[name = string("transpose_261_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_262_perm_0 = const()[name = string("transpose_262_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_262 = transpose(perm = transpose_262_perm_0, x = k_15_cast_fp16)[name = string("transpose_630")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_261 = transpose(perm = transpose_261_perm_0, x = q_11_cast_fp16)[name = string("transpose_631")];
+            tensor<fp16, [1, 20, ?, ?]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_261, y = transpose_262)[name = string("qk_7_cast_fp16")];
+            int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)];
+            int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)];
+            bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")];
+            tensor<int32, [2]> var_478_begin_0 = const()[name = string("op_478_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_478_end_mask_0 = const()[name = string("op_478_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = concat_37, end_mask = var_478_end_mask_0, x = mask_to_fp16)[name = string("op_478_cast_fp16")];
+            int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)];
+            int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
+            bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")];
+            tensor<int32, [2]> var_479_begin_0 = const()[name = string("op_479_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_479_end_mask_0 = const()[name = string("op_479_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_479_cast_fp16 = slice_by_index(begin = var_479_begin_0, end = concat_38, end_mask = var_479_end_mask_0, x = var_478_cast_fp16)[name = string("op_479_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_479_cast_fp16)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_482_cast_fp16 = softmax(axis = var_391, x = qk_9_cast_fp16)[name = string("op_482_cast_fp16")];
+            bool var_484_transpose_x_0 = const()[name = string("op_484_transpose_x_0"), val = bool(false)];
+            bool var_484_transpose_y_0 = const()[name = string("op_484_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_15_cast_fp16 = transpose(perm = var_475, x = var_474_cast_fp16)[name = string("transpose_632")];
+            tensor<fp16, [1, 20, ?, 64]> var_484_cast_fp16 = matmul(transpose_x = var_484_transpose_x_0, transpose_y = var_484_transpose_y_0, x = var_482_cast_fp16, y = v_15_cast_fp16)[name = string("op_484_cast_fp16")];
+            tensor<int32, [4]> var_485 = const()[name = string("op_485"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_39x = const()[name = string("concat_39x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_486_cast_fp16 = transpose(perm = var_485, x = var_484_cast_fp16)[name = string("transpose_629")];
+            tensor<fp16, [1, ?, 1280]> x_25_cast_fp16 = reshape(shape = concat_39x, x = var_486_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_490_to_fp16 = const()[name = string("op_490_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193924224)))];
+            tensor<fp16, [1280]> var_491_to_fp16 = const()[name = string("op_491_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197201088)))];
+            tensor<fp16, [1, ?, 1280]> linear_11_cast_fp16 = linear(bias = var_491_to_fp16, weight = var_490_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")];
+            tensor<int32, [1]> var_498_axes_0 = const()[name = string("op_498_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197203712)))];
+            tensor<fp16, [1280]> blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197206336)))];
+            tensor<fp16, [1, ?, 1280]> var_498_cast_fp16 = layer_norm(axes = var_498_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_498_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_507_to_fp16 = const()[name = string("op_507_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197208960)))];
+            tensor<fp16, [1280]> var_508_to_fp16 = const()[name = string("op_508_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200485824)))];
+            tensor<fp16, [1, ?, 1280]> linear_12_cast_fp16 = linear(bias = var_508_to_fp16, weight = var_507_to_fp16, x = var_498_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<int32, [3]> concat_40 = const()[name = string("concat_40"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_42 = const()[name = string("concat_42"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_43 = const()[name = string("concat_43"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_44x = const()[name = string("concat_44x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_528_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_528_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_166_to_fp16 = const()[name = string("const_166_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_15_cast_fp16 = mul(x = var_528_cast_fp16, y = const_166_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_534 = const()[name = string("op_534"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_535_cast_fp16 = reshape(shape = var_534, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_535_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_167_to_fp16 = const()[name = string("const_167_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_19_cast_fp16 = mul(x = var_535_cast_fp16, y = const_167_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_541 = const()[name = string("op_541"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_542_cast_fp16 = reshape(shape = var_541, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_542_cast_fp16")];
+            tensor<int32, [4]> var_543 = const()[name = string("op_543"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
+            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_263_perm_0 = const()[name = string("transpose_263_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_264_perm_0 = const()[name = string("transpose_264_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_264 = transpose(perm = transpose_264_perm_0, x = k_19_cast_fp16)[name = string("transpose_626")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_263 = transpose(perm = transpose_263_perm_0, x = q_15_cast_fp16)[name = string("transpose_627")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_263, y = transpose_264)[name = string("qk_11_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_547_cast_fp16 = softmax(axis = var_391, x = qk_11_cast_fp16)[name = string("op_547_cast_fp16")];
+            bool var_549_transpose_x_0 = const()[name = string("op_549_transpose_x_0"), val = bool(false)];
+            bool var_549_transpose_y_0 = const()[name = string("op_549_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_543, x = var_542_cast_fp16)[name = string("transpose_628")];
+            tensor<fp16, [1, 20, ?, 64]> var_549_cast_fp16 = matmul(transpose_x = var_549_transpose_x_0, transpose_y = var_549_transpose_y_0, x = var_547_cast_fp16, y = v_19_cast_fp16)[name = string("op_549_cast_fp16")];
+            tensor<int32, [4]> var_550 = const()[name = string("op_550"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_45x = const()[name = string("concat_45x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_551_cast_fp16 = transpose(perm = var_550, x = var_549_cast_fp16)[name = string("transpose_625")];
+            tensor<fp16, [1, ?, 1280]> x_31_cast_fp16 = reshape(shape = concat_45x, x = var_551_cast_fp16)[name = string("x_31_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_555_to_fp16 = const()[name = string("op_555_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200488448)))];
+            tensor<fp16, [1280]> var_556_to_fp16 = const()[name = string("op_556_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203765312)))];
+            tensor<fp16, [1, ?, 1280]> linear_13_cast_fp16 = linear(bias = var_556_to_fp16, weight = var_555_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")];
+            tensor<int32, [1]> var_563_axes_0 = const()[name = string("op_563_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203767936)))];
+            tensor<fp16, [1280]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203770560)))];
+            tensor<fp16, [1, ?, 1280]> var_563_cast_fp16 = layer_norm(axes = var_563_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_563_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_572_to_fp16 = const()[name = string("op_572_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203773184)))];
+            tensor<fp16, [5120]> var_573_to_fp16 = const()[name = string("op_573_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216880448)))];
+            tensor<fp16, [1, ?, 5120]> linear_14_cast_fp16 = linear(bias = var_573_to_fp16, weight = var_572_to_fp16, x = var_563_cast_fp16)[name = string("linear_14_cast_fp16")];
+            string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_578_to_fp16 = const()[name = string("op_578_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216890752)))];
+            tensor<fp16, [1280]> var_579_to_fp16 = const()[name = string("op_579_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229998016)))];
+            tensor<fp16, [1, ?, 1280]> linear_15_cast_fp16 = linear(bias = var_579_to_fp16, weight = var_578_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")];
+            tensor<int32, [4]> k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_66)[name = string("k_cache_9_cast_fp16")];
+            tensor<int32, [4]> v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_67)[name = string("v_cache_9_cast_fp16")];
+            tensor<int32, [4]> k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")];
+            tensor<int32, [4]> v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")];
+            int32 var_602 = const()[name = string("op_602"), val = int32(-1)];
+            tensor<int32, [1]> var_620_axes_0 = const()[name = string("op_620_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230000640)))];
+            tensor<fp16, [1280]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230003264)))];
+            fp16 var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_620_cast_fp16 = layer_norm(axes = var_620_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_620_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_631_to_fp16 = const()[name = string("op_631_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230005888)))];
+            tensor<fp16, [1280]> var_632_to_fp16 = const()[name = string("op_632_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233282752)))];
+            tensor<fp16, [1, ?, 1280]> linear_16_cast_fp16 = linear(bias = var_632_to_fp16, weight = var_631_to_fp16, x = var_620_cast_fp16)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_635_to_fp16 = const()[name = string("op_635_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233285376)))];
+            tensor<fp16, [1, ?, 1280]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_635_to_fp16, x = var_620_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_639_to_fp16 = const()[name = string("op_639_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236562240)))];
+            tensor<fp16, [1280]> var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239839104)))];
+            tensor<fp16, [1, ?, 1280]> linear_18_cast_fp16 = linear(bias = var_640_to_fp16, weight = var_639_to_fp16, x = var_620_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<int32, [3]> var_642_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_642_shape_cast_fp16")];
+            int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
+            int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
+            bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
+            string var_642_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_642_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_642_shape_cast_fp16_to_uint16 = cast(dtype = var_642_shape_cast_fp16_to_uint16_dtype_0, x = var_642_shape_cast_fp16)[name = string("cast_386")];
+            uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_642_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
+            string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_385")];
+            int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")];
+            tensor<int32, [1]> expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")];
+            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([2])];
+            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
+            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")];
+            tensor<int32, [1]> concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)];
+            bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_66)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_68_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_68 = read_state(input = k_cache1)[name = string("coreml_update_state_68")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_67)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_69_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_69 = read_state(input = v_cache1)[name = string("coreml_update_state_69")];
+            int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)];
+            int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(1280)];
+            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
+            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")];
+            tensor<int32, [3]> var_658_begin_0 = const()[name = string("op_658_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_658_end_mask_0 = const()[name = string("op_658_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_658_cast_fp16 = slice_by_index(begin = var_658_begin_0, end = concat_54, end_mask = var_658_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_658_cast_fp16")];
+            tensor<int32, [3]> var_661_begin_0 = const()[name = string("op_661_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_661_end_mask_0 = const()[name = string("op_661_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_661_cast_fp16 = slice_by_index(begin = var_661_begin_0, end = concat_54, end_mask = var_661_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_661_cast_fp16")];
+            tensor<int32, [4]> concat_56x = const()[name = string("concat_56x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_671_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_671_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_19_cast_fp16 = mul(x = var_671_cast_fp16, y = const_168_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> concat_57x = const()[name = string("concat_57x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_678_cast_fp16 = reshape(shape = concat_57x, x = var_658_cast_fp16)[name = string("op_678_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_25_cast_fp16 = mul(x = var_678_cast_fp16, y = const_169_to_fp16)[name = string("k_25_cast_fp16")];
+            tensor<int32, [4]> concat_58x = const()[name = string("concat_58x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_685_cast_fp16 = reshape(shape = concat_58x, x = var_661_cast_fp16)[name = string("op_685_cast_fp16")];
+            tensor<int32, [4]> var_686 = const()[name = string("op_686"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
+            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_265_perm_0 = const()[name = string("transpose_265_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_266_perm_0 = const()[name = string("transpose_266_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_266 = transpose(perm = transpose_266_perm_0, x = k_25_cast_fp16)[name = string("transpose_622")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_265 = transpose(perm = transpose_265_perm_0, x = q_19_cast_fp16)[name = string("transpose_623")];
+            tensor<fp16, [1, 20, ?, ?]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_265, y = transpose_266)[name = string("qk_13_cast_fp16")];
+            int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)];
+            int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)];
+            bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")];
+            tensor<int32, [2]> var_689_begin_0 = const()[name = string("op_689_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_689_end_mask_0 = const()[name = string("op_689_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_689_cast_fp16 = slice_by_index(begin = var_689_begin_0, end = concat_59, end_mask = var_689_end_mask_0, x = mask_to_fp16)[name = string("op_689_cast_fp16")];
+            int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)];
+            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
+            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")];
+            tensor<int32, [2]> var_690_begin_0 = const()[name = string("op_690_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_690_end_mask_0 = const()[name = string("op_690_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_690_cast_fp16 = slice_by_index(begin = var_690_begin_0, end = concat_60, end_mask = var_690_end_mask_0, x = var_689_cast_fp16)[name = string("op_690_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_690_cast_fp16)[name = string("qk_15_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_693_cast_fp16 = softmax(axis = var_602, x = qk_15_cast_fp16)[name = string("op_693_cast_fp16")];
+            bool var_695_transpose_x_0 = const()[name = string("op_695_transpose_x_0"), val = bool(false)];
+            bool var_695_transpose_y_0 = const()[name = string("op_695_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_25_cast_fp16 = transpose(perm = var_686, x = var_685_cast_fp16)[name = string("transpose_624")];
+            tensor<fp16, [1, 20, ?, 64]> var_695_cast_fp16 = matmul(transpose_x = var_695_transpose_x_0, transpose_y = var_695_transpose_y_0, x = var_693_cast_fp16, y = v_25_cast_fp16)[name = string("op_695_cast_fp16")];
+            tensor<int32, [4]> var_696 = const()[name = string("op_696"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_61x = const()[name = string("concat_61x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_697_cast_fp16 = transpose(perm = var_696, x = var_695_cast_fp16)[name = string("transpose_621")];
+            tensor<fp16, [1, ?, 1280]> x_43_cast_fp16 = reshape(shape = concat_61x, x = var_697_cast_fp16)[name = string("x_43_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_701_to_fp16 = const()[name = string("op_701_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239841728)))];
+            tensor<fp16, [1280]> var_702_to_fp16 = const()[name = string("op_702_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243118592)))];
+            tensor<fp16, [1, ?, 1280]> linear_19_cast_fp16 = linear(bias = var_702_to_fp16, weight = var_701_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")];
+            tensor<int32, [1]> var_709_axes_0 = const()[name = string("op_709_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243121216)))];
+            tensor<fp16, [1280]> blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243123840)))];
+            tensor<fp16, [1, ?, 1280]> var_709_cast_fp16 = layer_norm(axes = var_709_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_709_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_718_to_fp16 = const()[name = string("op_718_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243126464)))];
+            tensor<fp16, [1280]> var_719_to_fp16 = const()[name = string("op_719_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246403328)))];
+            tensor<fp16, [1, ?, 1280]> linear_20_cast_fp16 = linear(bias = var_719_to_fp16, weight = var_718_to_fp16, x = var_709_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [3]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_63 = const()[name = string("concat_63"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_64 = const()[name = string("concat_64"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_66x = const()[name = string("concat_66x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_739_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_739_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_23_cast_fp16 = mul(x = var_739_cast_fp16, y = const_170_to_fp16)[name = string("q_23_cast_fp16")];
+            tensor<int32, [4]> var_745 = const()[name = string("op_745"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_746_cast_fp16 = reshape(shape = var_745, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_746_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_29_cast_fp16 = mul(x = var_746_cast_fp16, y = const_171_to_fp16)[name = string("k_29_cast_fp16")];
+            tensor<int32, [4]> var_752 = const()[name = string("op_752"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_753_cast_fp16 = reshape(shape = var_752, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_753_cast_fp16")];
+            tensor<int32, [4]> var_754 = const()[name = string("op_754"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
+            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_267_perm_0 = const()[name = string("transpose_267_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_268_perm_0 = const()[name = string("transpose_268_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_268 = transpose(perm = transpose_268_perm_0, x = k_29_cast_fp16)[name = string("transpose_618")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_267 = transpose(perm = transpose_267_perm_0, x = q_23_cast_fp16)[name = string("transpose_619")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_267, y = transpose_268)[name = string("qk_17_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_758_cast_fp16 = softmax(axis = var_602, x = qk_17_cast_fp16)[name = string("op_758_cast_fp16")];
+            bool var_760_transpose_x_0 = const()[name = string("op_760_transpose_x_0"), val = bool(false)];
+            bool var_760_transpose_y_0 = const()[name = string("op_760_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_29_cast_fp16 = transpose(perm = var_754, x = var_753_cast_fp16)[name = string("transpose_620")];
+            tensor<fp16, [1, 20, ?, 64]> var_760_cast_fp16 = matmul(transpose_x = var_760_transpose_x_0, transpose_y = var_760_transpose_y_0, x = var_758_cast_fp16, y = v_29_cast_fp16)[name = string("op_760_cast_fp16")];
+            tensor<int32, [4]> var_761 = const()[name = string("op_761"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_67x = const()[name = string("concat_67x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_762_cast_fp16 = transpose(perm = var_761, x = var_760_cast_fp16)[name = string("transpose_617")];
+            tensor<fp16, [1, ?, 1280]> x_49_cast_fp16 = reshape(shape = concat_67x, x = var_762_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246405952)))];
+            tensor<fp16, [1280]> var_767_to_fp16 = const()[name = string("op_767_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249682816)))];
+            tensor<fp16, [1, ?, 1280]> linear_21_cast_fp16 = linear(bias = var_767_to_fp16, weight = var_766_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")];
+            tensor<int32, [1]> var_774_axes_0 = const()[name = string("op_774_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249685440)))];
+            tensor<fp16, [1280]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249688064)))];
+            tensor<fp16, [1, ?, 1280]> var_774_cast_fp16 = layer_norm(axes = var_774_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_774_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_783_to_fp16 = const()[name = string("op_783_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249690688)))];
+            tensor<fp16, [5120]> var_784_to_fp16 = const()[name = string("op_784_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262797952)))];
+            tensor<fp16, [1, ?, 5120]> linear_22_cast_fp16 = linear(bias = var_784_to_fp16, weight = var_783_to_fp16, x = var_774_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_789_to_fp16 = const()[name = string("op_789_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262808256)))];
+            tensor<fp16, [1280]> var_790_to_fp16 = const()[name = string("op_790_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275915520)))];
+            tensor<fp16, [1, ?, 1280]> linear_23_cast_fp16 = linear(bias = var_790_to_fp16, weight = var_789_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")];
+            tensor<int32, [4]> k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_68)[name = string("k_cache_13_cast_fp16")];
+            tensor<int32, [4]> v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_69)[name = string("v_cache_13_cast_fp16")];
+            tensor<int32, [4]> k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")];
+            tensor<int32, [4]> v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")];
+            int32 var_813 = const()[name = string("op_813"), val = int32(-1)];
+            tensor<int32, [1]> var_831_axes_0 = const()[name = string("op_831_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275918144)))];
+            tensor<fp16, [1280]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275920768)))];
+            fp16 var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_831_cast_fp16 = layer_norm(axes = var_831_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_831_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_842_to_fp16 = const()[name = string("op_842_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275923392)))];
+            tensor<fp16, [1280]> var_843_to_fp16 = const()[name = string("op_843_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279200256)))];
+            tensor<fp16, [1, ?, 1280]> linear_24_cast_fp16 = linear(bias = var_843_to_fp16, weight = var_842_to_fp16, x = var_831_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_846_to_fp16 = const()[name = string("op_846_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279202880)))];
+            tensor<fp16, [1, ?, 1280]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_846_to_fp16, x = var_831_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282479744)))];
+            tensor<fp16, [1280]> var_851_to_fp16 = const()[name = string("op_851_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285756608)))];
+            tensor<fp16, [1, ?, 1280]> linear_26_cast_fp16 = linear(bias = var_851_to_fp16, weight = var_850_to_fp16, x = var_831_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [3]> var_853_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_853_shape_cast_fp16")];
+            int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
+            int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
+            bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
+            string var_853_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_853_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_853_shape_cast_fp16_to_uint16 = cast(dtype = var_853_shape_cast_fp16_to_uint16_dtype_0, x = var_853_shape_cast_fp16)[name = string("cast_384")];
+            uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_853_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
+            string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_383")];
+            int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")];
+            tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")];
+            tensor<int32, [1]> concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor<int32, [1]>([3])];
+            int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)];
+            bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")];
+            tensor<int32, [1]> concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)];
+            bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_68)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_70_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_70 = read_state(input = k_cache1)[name = string("coreml_update_state_70")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_69)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_71_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_71 = read_state(input = v_cache1)[name = string("coreml_update_state_71")];
+            int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)];
+            int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(1280)];
+            int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)];
+            bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")];
+            tensor<int32, [3]> var_869_begin_0 = const()[name = string("op_869_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_869_end_mask_0 = const()[name = string("op_869_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = concat_76, end_mask = var_869_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_869_cast_fp16")];
+            tensor<int32, [3]> var_872_begin_0 = const()[name = string("op_872_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_872_end_mask_0 = const()[name = string("op_872_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_872_cast_fp16 = slice_by_index(begin = var_872_begin_0, end = concat_76, end_mask = var_872_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_872_cast_fp16")];
+            tensor<int32, [4]> concat_78x = const()[name = string("concat_78x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_882_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_882_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_27_cast_fp16 = mul(x = var_882_cast_fp16, y = const_172_to_fp16)[name = string("q_27_cast_fp16")];
+            tensor<int32, [4]> concat_79x = const()[name = string("concat_79x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_889_cast_fp16 = reshape(shape = concat_79x, x = var_869_cast_fp16)[name = string("op_889_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_35_cast_fp16 = mul(x = var_889_cast_fp16, y = const_173_to_fp16)[name = string("k_35_cast_fp16")];
+            tensor<int32, [4]> concat_80x = const()[name = string("concat_80x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_896_cast_fp16 = reshape(shape = concat_80x, x = var_872_cast_fp16)[name = string("op_896_cast_fp16")];
+            tensor<int32, [4]> var_897 = const()[name = string("op_897"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
+            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_269_perm_0 = const()[name = string("transpose_269_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_270_perm_0 = const()[name = string("transpose_270_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_270 = transpose(perm = transpose_270_perm_0, x = k_35_cast_fp16)[name = string("transpose_614")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_269 = transpose(perm = transpose_269_perm_0, x = q_27_cast_fp16)[name = string("transpose_615")];
+            tensor<fp16, [1, 20, ?, ?]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_269, y = transpose_270)[name = string("qk_19_cast_fp16")];
+            int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)];
+            int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
+            bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")];
+            tensor<int32, [2]> var_900_begin_0 = const()[name = string("op_900_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_900_end_mask_0 = const()[name = string("op_900_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_900_cast_fp16 = slice_by_index(begin = var_900_begin_0, end = concat_81, end_mask = var_900_end_mask_0, x = mask_to_fp16)[name = string("op_900_cast_fp16")];
+            int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)];
+            int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)];
+            bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")];
+            tensor<int32, [2]> var_901_begin_0 = const()[name = string("op_901_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_901_end_mask_0 = const()[name = string("op_901_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_901_cast_fp16 = slice_by_index(begin = var_901_begin_0, end = concat_82, end_mask = var_901_end_mask_0, x = var_900_cast_fp16)[name = string("op_901_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_901_cast_fp16)[name = string("qk_21_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_904_cast_fp16 = softmax(axis = var_813, x = qk_21_cast_fp16)[name = string("op_904_cast_fp16")];
+            bool var_906_transpose_x_0 = const()[name = string("op_906_transpose_x_0"), val = bool(false)];
+            bool var_906_transpose_y_0 = const()[name = string("op_906_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_35_cast_fp16 = transpose(perm = var_897, x = var_896_cast_fp16)[name = string("transpose_616")];
+            tensor<fp16, [1, 20, ?, 64]> var_906_cast_fp16 = matmul(transpose_x = var_906_transpose_x_0, transpose_y = var_906_transpose_y_0, x = var_904_cast_fp16, y = v_35_cast_fp16)[name = string("op_906_cast_fp16")];
+            tensor<int32, [4]> var_907 = const()[name = string("op_907"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_83x = const()[name = string("concat_83x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_908_cast_fp16 = transpose(perm = var_907, x = var_906_cast_fp16)[name = string("transpose_613")];
+            tensor<fp16, [1, ?, 1280]> x_61_cast_fp16 = reshape(shape = concat_83x, x = var_908_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285759232)))];
+            tensor<fp16, [1280]> var_913_to_fp16 = const()[name = string("op_913_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289036096)))];
+            tensor<fp16, [1, ?, 1280]> linear_27_cast_fp16 = linear(bias = var_913_to_fp16, weight = var_912_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")];
+            tensor<int32, [1]> var_920_axes_0 = const()[name = string("op_920_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289038720)))];
+            tensor<fp16, [1280]> blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289041344)))];
+            tensor<fp16, [1, ?, 1280]> var_920_cast_fp16 = layer_norm(axes = var_920_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_920_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_929_to_fp16 = const()[name = string("op_929_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289043968)))];
+            tensor<fp16, [1280]> var_930_to_fp16 = const()[name = string("op_930_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292320832)))];
+            tensor<fp16, [1, ?, 1280]> linear_28_cast_fp16 = linear(bias = var_930_to_fp16, weight = var_929_to_fp16, x = var_920_cast_fp16)[name = string("linear_28_cast_fp16")];
+            tensor<int32, [3]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_85 = const()[name = string("concat_85"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_87 = const()[name = string("concat_87"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_88x = const()[name = string("concat_88x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_950_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_950_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_31_cast_fp16 = mul(x = var_950_cast_fp16, y = const_174_to_fp16)[name = string("q_31_cast_fp16")];
+            tensor<int32, [4]> var_956 = const()[name = string("op_956"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_957_cast_fp16 = reshape(shape = var_956, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_957_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_39_cast_fp16 = mul(x = var_957_cast_fp16, y = const_175_to_fp16)[name = string("k_39_cast_fp16")];
+            tensor<int32, [4]> var_963 = const()[name = string("op_963"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_964_cast_fp16 = reshape(shape = var_963, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_964_cast_fp16")];
+            tensor<int32, [4]> var_965 = const()[name = string("op_965"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)];
+            bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_271_perm_0 = const()[name = string("transpose_271_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_272_perm_0 = const()[name = string("transpose_272_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_272 = transpose(perm = transpose_272_perm_0, x = k_39_cast_fp16)[name = string("transpose_610")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_271 = transpose(perm = transpose_271_perm_0, x = q_31_cast_fp16)[name = string("transpose_611")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_271, y = transpose_272)[name = string("qk_23_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_969_cast_fp16 = softmax(axis = var_813, x = qk_23_cast_fp16)[name = string("op_969_cast_fp16")];
+            bool var_971_transpose_x_0 = const()[name = string("op_971_transpose_x_0"), val = bool(false)];
+            bool var_971_transpose_y_0 = const()[name = string("op_971_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_39_cast_fp16 = transpose(perm = var_965, x = var_964_cast_fp16)[name = string("transpose_612")];
+            tensor<fp16, [1, 20, ?, 64]> var_971_cast_fp16 = matmul(transpose_x = var_971_transpose_x_0, transpose_y = var_971_transpose_y_0, x = var_969_cast_fp16, y = v_39_cast_fp16)[name = string("op_971_cast_fp16")];
+            tensor<int32, [4]> var_972 = const()[name = string("op_972"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_89x = const()[name = string("concat_89x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_973_cast_fp16 = transpose(perm = var_972, x = var_971_cast_fp16)[name = string("transpose_609")];
+            tensor<fp16, [1, ?, 1280]> x_67_cast_fp16 = reshape(shape = concat_89x, x = var_973_cast_fp16)[name = string("x_67_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_977_to_fp16 = const()[name = string("op_977_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292323456)))];
+            tensor<fp16, [1280]> var_978_to_fp16 = const()[name = string("op_978_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295600320)))];
+            tensor<fp16, [1, ?, 1280]> linear_29_cast_fp16 = linear(bias = var_978_to_fp16, weight = var_977_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")];
+            tensor<int32, [1]> var_985_axes_0 = const()[name = string("op_985_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295602944)))];
+            tensor<fp16, [1280]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295605568)))];
+            tensor<fp16, [1, ?, 1280]> var_985_cast_fp16 = layer_norm(axes = var_985_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_985_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_994_to_fp16 = const()[name = string("op_994_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295608192)))];
+            tensor<fp16, [5120]> var_995_to_fp16 = const()[name = string("op_995_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308715456)))];
+            tensor<fp16, [1, ?, 5120]> linear_30_cast_fp16 = linear(bias = var_995_to_fp16, weight = var_994_to_fp16, x = var_985_cast_fp16)[name = string("linear_30_cast_fp16")];
+            string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1000_to_fp16 = const()[name = string("op_1000_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308725760)))];
+            tensor<fp16, [1280]> var_1001_to_fp16 = const()[name = string("op_1001_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321833024)))];
+            tensor<fp16, [1, ?, 1280]> linear_31_cast_fp16 = linear(bias = var_1001_to_fp16, weight = var_1000_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")];
+            tensor<int32, [4]> k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_70)[name = string("k_cache_17_cast_fp16")];
+            tensor<int32, [4]> v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_71)[name = string("v_cache_17_cast_fp16")];
+            tensor<int32, [4]> k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")];
+            tensor<int32, [4]> v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")];
+            int32 var_1024 = const()[name = string("op_1024"), val = int32(-1)];
+            tensor<int32, [1]> var_1042_axes_0 = const()[name = string("op_1042_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321835648)))];
+            tensor<fp16, [1280]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321838272)))];
+            fp16 var_1030_to_fp16 = const()[name = string("op_1030_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_1042_cast_fp16 = layer_norm(axes = var_1042_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_1042_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1053_to_fp16 = const()[name = string("op_1053_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321840896)))];
+            tensor<fp16, [1280]> var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325117760)))];
+            tensor<fp16, [1, ?, 1280]> linear_32_cast_fp16 = linear(bias = var_1054_to_fp16, weight = var_1053_to_fp16, x = var_1042_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1057_to_fp16 = const()[name = string("op_1057_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325120384)))];
+            tensor<fp16, [1, ?, 1280]> linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1057_to_fp16, x = var_1042_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328397248)))];
+            tensor<fp16, [1280]> var_1062_to_fp16 = const()[name = string("op_1062_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331674112)))];
+            tensor<fp16, [1, ?, 1280]> linear_34_cast_fp16 = linear(bias = var_1062_to_fp16, weight = var_1061_to_fp16, x = var_1042_cast_fp16)[name = string("linear_34_cast_fp16")];
+            tensor<int32, [3]> var_1064_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_1064_shape_cast_fp16")];
+            int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)];
+            int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)];
+            bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)];
+            string var_1064_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1064_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1064_shape_cast_fp16_to_uint16 = cast(dtype = var_1064_shape_cast_fp16_to_uint16_dtype_0, x = var_1064_shape_cast_fp16)[name = string("cast_382")];
+            uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_1064_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")];
+            string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_381")];
+            int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")];
+            tensor<int32, [1]> expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")];
+            tensor<int32, [1]> concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor<int32, [1]>([4])];
+            int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)];
+            bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")];
+            tensor<int32, [1]> concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
+            bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_70)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_72_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_72 = read_state(input = k_cache1)[name = string("coreml_update_state_72")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_71)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_73_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_73 = read_state(input = v_cache1)[name = string("coreml_update_state_73")];
+            int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)];
+            int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(1280)];
+            int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)];
+            bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")];
+            tensor<int32, [3]> var_1080_begin_0 = const()[name = string("op_1080_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1080_end_mask_0 = const()[name = string("op_1080_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1080_cast_fp16 = slice_by_index(begin = var_1080_begin_0, end = concat_98, end_mask = var_1080_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1080_cast_fp16")];
+            tensor<int32, [3]> var_1083_begin_0 = const()[name = string("op_1083_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1083_end_mask_0 = const()[name = string("op_1083_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1083_cast_fp16 = slice_by_index(begin = var_1083_begin_0, end = concat_98, end_mask = var_1083_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1083_cast_fp16")];
+            tensor<int32, [4]> concat_100x = const()[name = string("concat_100x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1093_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1093_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_35_cast_fp16 = mul(x = var_1093_cast_fp16, y = const_176_to_fp16)[name = string("q_35_cast_fp16")];
+            tensor<int32, [4]> concat_101x = const()[name = string("concat_101x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1100_cast_fp16 = reshape(shape = concat_101x, x = var_1080_cast_fp16)[name = string("op_1100_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_45_cast_fp16 = mul(x = var_1100_cast_fp16, y = const_177_to_fp16)[name = string("k_45_cast_fp16")];
+            tensor<int32, [4]> concat_102x = const()[name = string("concat_102x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1107_cast_fp16 = reshape(shape = concat_102x, x = var_1083_cast_fp16)[name = string("op_1107_cast_fp16")];
+            tensor<int32, [4]> var_1108 = const()[name = string("op_1108"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)];
+            bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_273_perm_0 = const()[name = string("transpose_273_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_274_perm_0 = const()[name = string("transpose_274_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_274 = transpose(perm = transpose_274_perm_0, x = k_45_cast_fp16)[name = string("transpose_606")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_273 = transpose(perm = transpose_273_perm_0, x = q_35_cast_fp16)[name = string("transpose_607")];
+            tensor<fp16, [1, 20, ?, ?]> qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_273, y = transpose_274)[name = string("qk_25_cast_fp16")];
+            int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)];
+            int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)];
+            bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")];
+            tensor<int32, [2]> var_1111_begin_0 = const()[name = string("op_1111_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1111_end_mask_0 = const()[name = string("op_1111_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1111_cast_fp16 = slice_by_index(begin = var_1111_begin_0, end = concat_103, end_mask = var_1111_end_mask_0, x = mask_to_fp16)[name = string("op_1111_cast_fp16")];
+            int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)];
+            int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)];
+            bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")];
+            tensor<int32, [2]> var_1112_begin_0 = const()[name = string("op_1112_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1112_end_mask_0 = const()[name = string("op_1112_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = concat_104, end_mask = var_1112_end_mask_0, x = var_1111_cast_fp16)[name = string("op_1112_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1112_cast_fp16)[name = string("qk_27_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_1115_cast_fp16 = softmax(axis = var_1024, x = qk_27_cast_fp16)[name = string("op_1115_cast_fp16")];
+            bool var_1117_transpose_x_0 = const()[name = string("op_1117_transpose_x_0"), val = bool(false)];
+            bool var_1117_transpose_y_0 = const()[name = string("op_1117_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_45_cast_fp16 = transpose(perm = var_1108, x = var_1107_cast_fp16)[name = string("transpose_608")];
+            tensor<fp16, [1, 20, ?, 64]> var_1117_cast_fp16 = matmul(transpose_x = var_1117_transpose_x_0, transpose_y = var_1117_transpose_y_0, x = var_1115_cast_fp16, y = v_45_cast_fp16)[name = string("op_1117_cast_fp16")];
+            tensor<int32, [4]> var_1118 = const()[name = string("op_1118"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_105x = const()[name = string("concat_105x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1119_cast_fp16 = transpose(perm = var_1118, x = var_1117_cast_fp16)[name = string("transpose_605")];
+            tensor<fp16, [1, ?, 1280]> x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1119_cast_fp16)[name = string("x_79_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1123_to_fp16 = const()[name = string("op_1123_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331676736)))];
+            tensor<fp16, [1280]> var_1124_to_fp16 = const()[name = string("op_1124_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334953600)))];
+            tensor<fp16, [1, ?, 1280]> linear_35_cast_fp16 = linear(bias = var_1124_to_fp16, weight = var_1123_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")];
+            tensor<int32, [1]> var_1131_axes_0 = const()[name = string("op_1131_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334956224)))];
+            tensor<fp16, [1280]> blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334958848)))];
+            tensor<fp16, [1, ?, 1280]> var_1131_cast_fp16 = layer_norm(axes = var_1131_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1131_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1140_to_fp16 = const()[name = string("op_1140_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334961472)))];
+            tensor<fp16, [1280]> var_1141_to_fp16 = const()[name = string("op_1141_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338238336)))];
+            tensor<fp16, [1, ?, 1280]> linear_36_cast_fp16 = linear(bias = var_1141_to_fp16, weight = var_1140_to_fp16, x = var_1131_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<int32, [3]> concat_106 = const()[name = string("concat_106"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_108 = const()[name = string("concat_108"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_109 = const()[name = string("concat_109"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_110x = const()[name = string("concat_110x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1161_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1161_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_39_cast_fp16 = mul(x = var_1161_cast_fp16, y = const_178_to_fp16)[name = string("q_39_cast_fp16")];
+            tensor<int32, [4]> var_1167 = const()[name = string("op_1167"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1168_cast_fp16 = reshape(shape = var_1167, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1168_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_49_cast_fp16 = mul(x = var_1168_cast_fp16, y = const_179_to_fp16)[name = string("k_49_cast_fp16")];
+            tensor<int32, [4]> var_1174 = const()[name = string("op_1174"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1175_cast_fp16 = reshape(shape = var_1174, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1175_cast_fp16")];
+            tensor<int32, [4]> var_1176 = const()[name = string("op_1176"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)];
+            bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_275_perm_0 = const()[name = string("transpose_275_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_276_perm_0 = const()[name = string("transpose_276_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_276 = transpose(perm = transpose_276_perm_0, x = k_49_cast_fp16)[name = string("transpose_602")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_275 = transpose(perm = transpose_275_perm_0, x = q_39_cast_fp16)[name = string("transpose_603")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_275, y = transpose_276)[name = string("qk_29_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_1180_cast_fp16 = softmax(axis = var_1024, x = qk_29_cast_fp16)[name = string("op_1180_cast_fp16")];
+            bool var_1182_transpose_x_0 = const()[name = string("op_1182_transpose_x_0"), val = bool(false)];
+            bool var_1182_transpose_y_0 = const()[name = string("op_1182_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_49_cast_fp16 = transpose(perm = var_1176, x = var_1175_cast_fp16)[name = string("transpose_604")];
+            tensor<fp16, [1, 20, ?, 64]> var_1182_cast_fp16 = matmul(transpose_x = var_1182_transpose_x_0, transpose_y = var_1182_transpose_y_0, x = var_1180_cast_fp16, y = v_49_cast_fp16)[name = string("op_1182_cast_fp16")];
+            tensor<int32, [4]> var_1183 = const()[name = string("op_1183"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_111x = const()[name = string("concat_111x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1184_cast_fp16 = transpose(perm = var_1183, x = var_1182_cast_fp16)[name = string("transpose_601")];
+            tensor<fp16, [1, ?, 1280]> x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1184_cast_fp16)[name = string("x_85_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1188_to_fp16 = const()[name = string("op_1188_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338240960)))];
+            tensor<fp16, [1280]> var_1189_to_fp16 = const()[name = string("op_1189_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341517824)))];
+            tensor<fp16, [1, ?, 1280]> linear_37_cast_fp16 = linear(bias = var_1189_to_fp16, weight = var_1188_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")];
+            tensor<int32, [1]> var_1196_axes_0 = const()[name = string("op_1196_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341520448)))];
+            tensor<fp16, [1280]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341523072)))];
+            tensor<fp16, [1, ?, 1280]> var_1196_cast_fp16 = layer_norm(axes = var_1196_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1196_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1205_to_fp16 = const()[name = string("op_1205_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341525696)))];
+            tensor<fp16, [5120]> var_1206_to_fp16 = const()[name = string("op_1206_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354632960)))];
+            tensor<fp16, [1, ?, 5120]> linear_38_cast_fp16 = linear(bias = var_1206_to_fp16, weight = var_1205_to_fp16, x = var_1196_cast_fp16)[name = string("linear_38_cast_fp16")];
+            string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354643264)))];
+            tensor<fp16, [1280]> var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367750528)))];
+            tensor<fp16, [1, ?, 1280]> linear_39_cast_fp16 = linear(bias = var_1212_to_fp16, weight = var_1211_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")];
+            tensor<int32, [4]> k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_72)[name = string("k_cache_21_cast_fp16")];
+            tensor<int32, [4]> v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_73)[name = string("v_cache_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")];
+            tensor<int32, [4]> v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")];
+            int32 var_1235 = const()[name = string("op_1235"), val = int32(-1)];
+            tensor<int32, [1]> var_1253_axes_0 = const()[name = string("op_1253_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367753152)))];
+            tensor<fp16, [1280]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367755776)))];
+            fp16 var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_1253_cast_fp16 = layer_norm(axes = var_1253_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1253_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1264_to_fp16 = const()[name = string("op_1264_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367758400)))];
+            tensor<fp16, [1280]> var_1265_to_fp16 = const()[name = string("op_1265_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371035264)))];
+            tensor<fp16, [1, ?, 1280]> linear_40_cast_fp16 = linear(bias = var_1265_to_fp16, weight = var_1264_to_fp16, x = var_1253_cast_fp16)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1268_to_fp16 = const()[name = string("op_1268_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371037888)))];
+            tensor<fp16, [1, ?, 1280]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1268_to_fp16, x = var_1253_cast_fp16)[name = string("linear_41_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1272_to_fp16 = const()[name = string("op_1272_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374314752)))];
+            tensor<fp16, [1280]> var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377591616)))];
+            tensor<fp16, [1, ?, 1280]> linear_42_cast_fp16 = linear(bias = var_1273_to_fp16, weight = var_1272_to_fp16, x = var_1253_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<int32, [3]> var_1275_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1275_shape_cast_fp16")];
+            int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)];
+            int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)];
+            bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)];
+            string var_1275_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1275_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1275_shape_cast_fp16_to_uint16 = cast(dtype = var_1275_shape_cast_fp16_to_uint16_dtype_0, x = var_1275_shape_cast_fp16)[name = string("cast_380")];
+            uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1275_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")];
+            string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_379")];
+            int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")];
+            tensor<int32, [1]> expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")];
+            tensor<int32, [1]> concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor<int32, [1]>([5])];
+            int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
+            bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")];
+            tensor<int32, [1]> concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)];
+            bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_72)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_74_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_74 = read_state(input = k_cache1)[name = string("coreml_update_state_74")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_73)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_75_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_75 = read_state(input = v_cache1)[name = string("coreml_update_state_75")];
+            int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)];
+            int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(1280)];
+            int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)];
+            bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")];
+            tensor<int32, [3]> var_1291_begin_0 = const()[name = string("op_1291_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1291_end_mask_0 = const()[name = string("op_1291_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1291_cast_fp16 = slice_by_index(begin = var_1291_begin_0, end = concat_120, end_mask = var_1291_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1291_cast_fp16")];
+            tensor<int32, [3]> var_1294_begin_0 = const()[name = string("op_1294_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1294_end_mask_0 = const()[name = string("op_1294_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1294_cast_fp16 = slice_by_index(begin = var_1294_begin_0, end = concat_120, end_mask = var_1294_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1294_cast_fp16")];
+            tensor<int32, [4]> concat_122x = const()[name = string("concat_122x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1304_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1304_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_43_cast_fp16 = mul(x = var_1304_cast_fp16, y = const_180_to_fp16)[name = string("q_43_cast_fp16")];
+            tensor<int32, [4]> concat_123x = const()[name = string("concat_123x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1311_cast_fp16 = reshape(shape = concat_123x, x = var_1291_cast_fp16)[name = string("op_1311_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_55_cast_fp16 = mul(x = var_1311_cast_fp16, y = const_181_to_fp16)[name = string("k_55_cast_fp16")];
+            tensor<int32, [4]> concat_124x = const()[name = string("concat_124x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1318_cast_fp16 = reshape(shape = concat_124x, x = var_1294_cast_fp16)[name = string("op_1318_cast_fp16")];
+            tensor<int32, [4]> var_1319 = const()[name = string("op_1319"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)];
+            bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_277_perm_0 = const()[name = string("transpose_277_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_278_perm_0 = const()[name = string("transpose_278_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_278 = transpose(perm = transpose_278_perm_0, x = k_55_cast_fp16)[name = string("transpose_598")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_277 = transpose(perm = transpose_277_perm_0, x = q_43_cast_fp16)[name = string("transpose_599")];
+            tensor<fp16, [1, 20, ?, ?]> qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_277, y = transpose_278)[name = string("qk_31_cast_fp16")];
+            int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)];
+            int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)];
+            bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")];
+            tensor<int32, [2]> var_1322_begin_0 = const()[name = string("op_1322_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1322_end_mask_0 = const()[name = string("op_1322_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1322_cast_fp16 = slice_by_index(begin = var_1322_begin_0, end = concat_125, end_mask = var_1322_end_mask_0, x = mask_to_fp16)[name = string("op_1322_cast_fp16")];
+            int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)];
+            int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)];
+            bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")];
+            tensor<int32, [2]> var_1323_begin_0 = const()[name = string("op_1323_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1323_end_mask_0 = const()[name = string("op_1323_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1323_cast_fp16 = slice_by_index(begin = var_1323_begin_0, end = concat_126, end_mask = var_1323_end_mask_0, x = var_1322_cast_fp16)[name = string("op_1323_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1323_cast_fp16)[name = string("qk_33_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_1326_cast_fp16 = softmax(axis = var_1235, x = qk_33_cast_fp16)[name = string("op_1326_cast_fp16")];
+            bool var_1328_transpose_x_0 = const()[name = string("op_1328_transpose_x_0"), val = bool(false)];
+            bool var_1328_transpose_y_0 = const()[name = string("op_1328_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_55_cast_fp16 = transpose(perm = var_1319, x = var_1318_cast_fp16)[name = string("transpose_600")];
+            tensor<fp16, [1, 20, ?, 64]> var_1328_cast_fp16 = matmul(transpose_x = var_1328_transpose_x_0, transpose_y = var_1328_transpose_y_0, x = var_1326_cast_fp16, y = v_55_cast_fp16)[name = string("op_1328_cast_fp16")];
+            tensor<int32, [4]> var_1329 = const()[name = string("op_1329"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_127x = const()[name = string("concat_127x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1330_cast_fp16 = transpose(perm = var_1329, x = var_1328_cast_fp16)[name = string("transpose_597")];
+            tensor<fp16, [1, ?, 1280]> x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1330_cast_fp16)[name = string("x_97_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1334_to_fp16 = const()[name = string("op_1334_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377594240)))];
+            tensor<fp16, [1280]> var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380871104)))];
+            tensor<fp16, [1, ?, 1280]> linear_43_cast_fp16 = linear(bias = var_1335_to_fp16, weight = var_1334_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")];
+            tensor<int32, [1]> var_1342_axes_0 = const()[name = string("op_1342_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380873728)))];
+            tensor<fp16, [1280]> blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380876352)))];
+            tensor<fp16, [1, ?, 1280]> var_1342_cast_fp16 = layer_norm(axes = var_1342_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1342_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1351_to_fp16 = const()[name = string("op_1351_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380878976)))];
+            tensor<fp16, [1280]> var_1352_to_fp16 = const()[name = string("op_1352_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384155840)))];
+            tensor<fp16, [1, ?, 1280]> linear_44_cast_fp16 = linear(bias = var_1352_to_fp16, weight = var_1351_to_fp16, x = var_1342_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [3]> concat_128 = const()[name = string("concat_128"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_129 = const()[name = string("concat_129"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_130 = const()[name = string("concat_130"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_131 = const()[name = string("concat_131"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_132x = const()[name = string("concat_132x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1372_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1372_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_47_cast_fp16 = mul(x = var_1372_cast_fp16, y = const_182_to_fp16)[name = string("q_47_cast_fp16")];
+            tensor<int32, [4]> var_1378 = const()[name = string("op_1378"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1379_cast_fp16 = reshape(shape = var_1378, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1379_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_59_cast_fp16 = mul(x = var_1379_cast_fp16, y = const_183_to_fp16)[name = string("k_59_cast_fp16")];
+            tensor<int32, [4]> var_1385 = const()[name = string("op_1385"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1386_cast_fp16 = reshape(shape = var_1385, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1386_cast_fp16")];
+            tensor<int32, [4]> var_1387 = const()[name = string("op_1387"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)];
+            bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_279_perm_0 = const()[name = string("transpose_279_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_280_perm_0 = const()[name = string("transpose_280_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_280 = transpose(perm = transpose_280_perm_0, x = k_59_cast_fp16)[name = string("transpose_594")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_279 = transpose(perm = transpose_279_perm_0, x = q_47_cast_fp16)[name = string("transpose_595")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_279, y = transpose_280)[name = string("qk_35_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_1391_cast_fp16 = softmax(axis = var_1235, x = qk_35_cast_fp16)[name = string("op_1391_cast_fp16")];
+            bool var_1393_transpose_x_0 = const()[name = string("op_1393_transpose_x_0"), val = bool(false)];
+            bool var_1393_transpose_y_0 = const()[name = string("op_1393_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_59_cast_fp16 = transpose(perm = var_1387, x = var_1386_cast_fp16)[name = string("transpose_596")];
+            tensor<fp16, [1, 20, ?, 64]> var_1393_cast_fp16 = matmul(transpose_x = var_1393_transpose_x_0, transpose_y = var_1393_transpose_y_0, x = var_1391_cast_fp16, y = v_59_cast_fp16)[name = string("op_1393_cast_fp16")];
+            tensor<int32, [4]> var_1394 = const()[name = string("op_1394"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_133x = const()[name = string("concat_133x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1395_cast_fp16 = transpose(perm = var_1394, x = var_1393_cast_fp16)[name = string("transpose_593")];
+            tensor<fp16, [1, ?, 1280]> x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1395_cast_fp16)[name = string("x_103_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1399_to_fp16 = const()[name = string("op_1399_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384158464)))];
+            tensor<fp16, [1280]> var_1400_to_fp16 = const()[name = string("op_1400_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387435328)))];
+            tensor<fp16, [1, ?, 1280]> linear_45_cast_fp16 = linear(bias = var_1400_to_fp16, weight = var_1399_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")];
+            tensor<int32, [1]> var_1407_axes_0 = const()[name = string("op_1407_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387437952)))];
+            tensor<fp16, [1280]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387440576)))];
+            tensor<fp16, [1, ?, 1280]> var_1407_cast_fp16 = layer_norm(axes = var_1407_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1407_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1416_to_fp16 = const()[name = string("op_1416_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387443200)))];
+            tensor<fp16, [5120]> var_1417_to_fp16 = const()[name = string("op_1417_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400550464)))];
+            tensor<fp16, [1, ?, 5120]> linear_46_cast_fp16 = linear(bias = var_1417_to_fp16, weight = var_1416_to_fp16, x = var_1407_cast_fp16)[name = string("linear_46_cast_fp16")];
+            string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1422_to_fp16 = const()[name = string("op_1422_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400560768)))];
+            tensor<fp16, [1280]> var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413668032)))];
+            tensor<fp16, [1, ?, 1280]> linear_47_cast_fp16 = linear(bias = var_1423_to_fp16, weight = var_1422_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")];
+            tensor<int32, [4]> k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor<int32, [4]>([7, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_74)[name = string("k_cache_25_cast_fp16")];
+            tensor<int32, [4]> v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor<int32, [4]>([7, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_75)[name = string("v_cache_25_cast_fp16")];
+            tensor<int32, [4]> k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor<int32, [4]>([7, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")];
+            tensor<int32, [4]> v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor<int32, [4]>([7, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")];
+            int32 var_1446 = const()[name = string("op_1446"), val = int32(-1)];
+            tensor<int32, [1]> var_1464_axes_0 = const()[name = string("op_1464_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413670656)))];
+            tensor<fp16, [1280]> blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413673280)))];
+            fp16 var_1452_to_fp16 = const()[name = string("op_1452_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_1464_cast_fp16 = layer_norm(axes = var_1464_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1464_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1475_to_fp16 = const()[name = string("op_1475_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413675904)))];
+            tensor<fp16, [1280]> var_1476_to_fp16 = const()[name = string("op_1476_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416952768)))];
+            tensor<fp16, [1, ?, 1280]> linear_48_cast_fp16 = linear(bias = var_1476_to_fp16, weight = var_1475_to_fp16, x = var_1464_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1479_to_fp16 = const()[name = string("op_1479_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416955392)))];
+            tensor<fp16, [1, ?, 1280]> linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1479_to_fp16, x = var_1464_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420232256)))];
+            tensor<fp16, [1280]> var_1484_to_fp16 = const()[name = string("op_1484_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423509120)))];
+            tensor<fp16, [1, ?, 1280]> linear_50_cast_fp16 = linear(bias = var_1484_to_fp16, weight = var_1483_to_fp16, x = var_1464_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [3]> var_1486_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1486_shape_cast_fp16")];
+            int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)];
+            int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)];
+            bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)];
+            string var_1486_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1486_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1486_shape_cast_fp16_to_uint16 = cast(dtype = var_1486_shape_cast_fp16_to_uint16_dtype_0, x = var_1486_shape_cast_fp16)[name = string("cast_378")];
+            uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1486_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")];
+            string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_377")];
+            int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")];
+            tensor<int32, [1]> expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")];
+            tensor<int32, [1]> concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor<int32, [1]>([6])];
+            int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)];
+            bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")];
+            tensor<int32, [1]> concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)];
+            bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_74)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_76_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_76 = read_state(input = k_cache1)[name = string("coreml_update_state_76")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_75)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_77_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_77 = read_state(input = v_cache1)[name = string("coreml_update_state_77")];
+            int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)];
+            int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(1280)];
+            int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)];
+            bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")];
+            tensor<int32, [3]> var_1502_begin_0 = const()[name = string("op_1502_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1502_end_mask_0 = const()[name = string("op_1502_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = concat_142, end_mask = var_1502_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1502_cast_fp16")];
+            tensor<int32, [3]> var_1505_begin_0 = const()[name = string("op_1505_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1505_end_mask_0 = const()[name = string("op_1505_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1505_cast_fp16 = slice_by_index(begin = var_1505_begin_0, end = concat_142, end_mask = var_1505_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1505_cast_fp16")];
+            tensor<int32, [4]> concat_144x = const()[name = string("concat_144x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1515_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1515_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_184_to_fp16 = const()[name = string("const_184_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_51_cast_fp16 = mul(x = var_1515_cast_fp16, y = const_184_to_fp16)[name = string("q_51_cast_fp16")];
+            tensor<int32, [4]> concat_145x = const()[name = string("concat_145x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1522_cast_fp16 = reshape(shape = concat_145x, x = var_1502_cast_fp16)[name = string("op_1522_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_185_to_fp16 = const()[name = string("const_185_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_65_cast_fp16 = mul(x = var_1522_cast_fp16, y = const_185_to_fp16)[name = string("k_65_cast_fp16")];
+            tensor<int32, [4]> concat_146x = const()[name = string("concat_146x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1529_cast_fp16 = reshape(shape = concat_146x, x = var_1505_cast_fp16)[name = string("op_1529_cast_fp16")];
+            tensor<int32, [4]> var_1530 = const()[name = string("op_1530"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)];
+            bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_281_perm_0 = const()[name = string("transpose_281_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_282_perm_0 = const()[name = string("transpose_282_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_282 = transpose(perm = transpose_282_perm_0, x = k_65_cast_fp16)[name = string("transpose_590")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_281 = transpose(perm = transpose_281_perm_0, x = q_51_cast_fp16)[name = string("transpose_591")];
+            tensor<fp16, [1, 20, ?, ?]> qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_281, y = transpose_282)[name = string("qk_37_cast_fp16")];
+            int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)];
+            int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)];
+            bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")];
+            tensor<int32, [2]> var_1533_begin_0 = const()[name = string("op_1533_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1533_end_mask_0 = const()[name = string("op_1533_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1533_cast_fp16 = slice_by_index(begin = var_1533_begin_0, end = concat_147, end_mask = var_1533_end_mask_0, x = mask_to_fp16)[name = string("op_1533_cast_fp16")];
+            int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)];
+            int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)];
+            bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")];
+            tensor<int32, [2]> var_1534_begin_0 = const()[name = string("op_1534_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1534_end_mask_0 = const()[name = string("op_1534_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1534_cast_fp16 = slice_by_index(begin = var_1534_begin_0, end = concat_148, end_mask = var_1534_end_mask_0, x = var_1533_cast_fp16)[name = string("op_1534_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1534_cast_fp16)[name = string("qk_39_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_1537_cast_fp16 = softmax(axis = var_1446, x = qk_39_cast_fp16)[name = string("op_1537_cast_fp16")];
+            bool var_1539_transpose_x_0 = const()[name = string("op_1539_transpose_x_0"), val = bool(false)];
+            bool var_1539_transpose_y_0 = const()[name = string("op_1539_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_65_cast_fp16 = transpose(perm = var_1530, x = var_1529_cast_fp16)[name = string("transpose_592")];
+            tensor<fp16, [1, 20, ?, 64]> var_1539_cast_fp16 = matmul(transpose_x = var_1539_transpose_x_0, transpose_y = var_1539_transpose_y_0, x = var_1537_cast_fp16, y = v_65_cast_fp16)[name = string("op_1539_cast_fp16")];
+            tensor<int32, [4]> var_1540 = const()[name = string("op_1540"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_149x = const()[name = string("concat_149x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1541_cast_fp16 = transpose(perm = var_1540, x = var_1539_cast_fp16)[name = string("transpose_589")];
+            tensor<fp16, [1, ?, 1280]> x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1541_cast_fp16)[name = string("x_115_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423511744)))];
+            tensor<fp16, [1280]> var_1546_to_fp16 = const()[name = string("op_1546_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426788608)))];
+            tensor<fp16, [1, ?, 1280]> linear_51_cast_fp16 = linear(bias = var_1546_to_fp16, weight = var_1545_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")];
+            tensor<int32, [1]> var_1553_axes_0 = const()[name = string("op_1553_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426791232)))];
+            tensor<fp16, [1280]> blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426793856)))];
+            tensor<fp16, [1, ?, 1280]> var_1553_cast_fp16 = layer_norm(axes = var_1553_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1553_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1562_to_fp16 = const()[name = string("op_1562_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426796480)))];
+            tensor<fp16, [1280]> var_1563_to_fp16 = const()[name = string("op_1563_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430073344)))];
+            tensor<fp16, [1, ?, 1280]> linear_52_cast_fp16 = linear(bias = var_1563_to_fp16, weight = var_1562_to_fp16, x = var_1553_cast_fp16)[name = string("linear_52_cast_fp16")];
+            tensor<int32, [3]> concat_150 = const()[name = string("concat_150"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_151 = const()[name = string("concat_151"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_152 = const()[name = string("concat_152"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_153 = const()[name = string("concat_153"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_154x = const()[name = string("concat_154x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1583_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1583_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_55_cast_fp16 = mul(x = var_1583_cast_fp16, y = const_186_to_fp16)[name = string("q_55_cast_fp16")];
+            tensor<int32, [4]> var_1589 = const()[name = string("op_1589"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1590_cast_fp16 = reshape(shape = var_1589, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1590_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_187_to_fp16 = const()[name = string("const_187_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_69_cast_fp16 = mul(x = var_1590_cast_fp16, y = const_187_to_fp16)[name = string("k_69_cast_fp16")];
+            tensor<int32, [4]> var_1596 = const()[name = string("op_1596"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1597_cast_fp16 = reshape(shape = var_1596, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1597_cast_fp16")];
+            tensor<int32, [4]> var_1598 = const()[name = string("op_1598"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)];
+            bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_283_perm_0 = const()[name = string("transpose_283_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_284_perm_0 = const()[name = string("transpose_284_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_284 = transpose(perm = transpose_284_perm_0, x = k_69_cast_fp16)[name = string("transpose_586")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_283 = transpose(perm = transpose_283_perm_0, x = q_55_cast_fp16)[name = string("transpose_587")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_283, y = transpose_284)[name = string("qk_41_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_1602_cast_fp16 = softmax(axis = var_1446, x = qk_41_cast_fp16)[name = string("op_1602_cast_fp16")];
+            bool var_1604_transpose_x_0 = const()[name = string("op_1604_transpose_x_0"), val = bool(false)];
+            bool var_1604_transpose_y_0 = const()[name = string("op_1604_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_69_cast_fp16 = transpose(perm = var_1598, x = var_1597_cast_fp16)[name = string("transpose_588")];
+            tensor<fp16, [1, 20, ?, 64]> var_1604_cast_fp16 = matmul(transpose_x = var_1604_transpose_x_0, transpose_y = var_1604_transpose_y_0, x = var_1602_cast_fp16, y = v_69_cast_fp16)[name = string("op_1604_cast_fp16")];
+            tensor<int32, [4]> var_1605 = const()[name = string("op_1605"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_155x = const()[name = string("concat_155x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1606_cast_fp16 = transpose(perm = var_1605, x = var_1604_cast_fp16)[name = string("transpose_585")];
+            tensor<fp16, [1, ?, 1280]> x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1606_cast_fp16)[name = string("x_121_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430075968)))];
+            tensor<fp16, [1280]> var_1611_to_fp16 = const()[name = string("op_1611_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433352832)))];
+            tensor<fp16, [1, ?, 1280]> linear_53_cast_fp16 = linear(bias = var_1611_to_fp16, weight = var_1610_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")];
+            tensor<int32, [1]> var_1618_axes_0 = const()[name = string("op_1618_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433355456)))];
+            tensor<fp16, [1280]> blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433358080)))];
+            tensor<fp16, [1, ?, 1280]> var_1618_cast_fp16 = layer_norm(axes = var_1618_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1618_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1627_to_fp16 = const()[name = string("op_1627_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433360704)))];
+            tensor<fp16, [5120]> var_1628_to_fp16 = const()[name = string("op_1628_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446467968)))];
+            tensor<fp16, [1, ?, 5120]> linear_54_cast_fp16 = linear(bias = var_1628_to_fp16, weight = var_1627_to_fp16, x = var_1618_cast_fp16)[name = string("linear_54_cast_fp16")];
+            string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446478272)))];
+            tensor<fp16, [1280]> var_1634_to_fp16 = const()[name = string("op_1634_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459585536)))];
+            tensor<fp16, [1, ?, 1280]> linear_55_cast_fp16 = linear(bias = var_1634_to_fp16, weight = var_1633_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")];
+            tensor<int32, [4]> k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor<int32, [4]>([8, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_76)[name = string("k_cache_29_cast_fp16")];
+            tensor<int32, [4]> v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor<int32, [4]>([8, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_77)[name = string("v_cache_29_cast_fp16")];
+            tensor<int32, [4]> k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor<int32, [4]>([8, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")];
+            tensor<int32, [4]> v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor<int32, [4]>([8, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")];
+            int32 var_1657 = const()[name = string("op_1657"), val = int32(-1)];
+            tensor<int32, [1]> var_1675_axes_0 = const()[name = string("op_1675_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459588160)))];
+            tensor<fp16, [1280]> blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459590784)))];
+            fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_1675_cast_fp16 = layer_norm(axes = var_1675_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1675_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459593408)))];
+            tensor<fp16, [1280]> var_1687_to_fp16 = const()[name = string("op_1687_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462870272)))];
+            tensor<fp16, [1, ?, 1280]> linear_56_cast_fp16 = linear(bias = var_1687_to_fp16, weight = var_1686_to_fp16, x = var_1675_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1690_to_fp16 = const()[name = string("op_1690_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462872896)))];
+            tensor<fp16, [1, ?, 1280]> linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1690_to_fp16, x = var_1675_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466149760)))];
+            tensor<fp16, [1280]> var_1695_to_fp16 = const()[name = string("op_1695_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469426624)))];
+            tensor<fp16, [1, ?, 1280]> linear_58_cast_fp16 = linear(bias = var_1695_to_fp16, weight = var_1694_to_fp16, x = var_1675_cast_fp16)[name = string("linear_58_cast_fp16")];
+            tensor<int32, [3]> var_1697_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1697_shape_cast_fp16")];
+            int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)];
+            int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)];
+            bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)];
+            string var_1697_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1697_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1697_shape_cast_fp16_to_uint16 = cast(dtype = var_1697_shape_cast_fp16_to_uint16_dtype_0, x = var_1697_shape_cast_fp16)[name = string("cast_376")];
+            uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1697_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")];
+            string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_375")];
+            int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")];
+            tensor<int32, [1]> expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")];
+            tensor<int32, [1]> concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor<int32, [1]>([7])];
+            int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)];
+            bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")];
+            tensor<int32, [1]> concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)];
+            bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_76)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_78_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_78 = read_state(input = k_cache1)[name = string("coreml_update_state_78")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_77)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_79_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_79 = read_state(input = v_cache1)[name = string("coreml_update_state_79")];
+            int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)];
+            int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(1280)];
+            int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)];
+            bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")];
+            tensor<int32, [3]> var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = concat_164, end_mask = var_1713_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1713_cast_fp16")];
+            tensor<int32, [3]> var_1716_begin_0 = const()[name = string("op_1716_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1716_end_mask_0 = const()[name = string("op_1716_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1716_cast_fp16 = slice_by_index(begin = var_1716_begin_0, end = concat_164, end_mask = var_1716_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1716_cast_fp16")];
+            tensor<int32, [4]> concat_166x = const()[name = string("concat_166x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1726_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1726_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_59_cast_fp16 = mul(x = var_1726_cast_fp16, y = const_188_to_fp16)[name = string("q_59_cast_fp16")];
+            tensor<int32, [4]> concat_167x = const()[name = string("concat_167x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1733_cast_fp16 = reshape(shape = concat_167x, x = var_1713_cast_fp16)[name = string("op_1733_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_75_cast_fp16 = mul(x = var_1733_cast_fp16, y = const_189_to_fp16)[name = string("k_75_cast_fp16")];
+            tensor<int32, [4]> concat_168x = const()[name = string("concat_168x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1740_cast_fp16 = reshape(shape = concat_168x, x = var_1716_cast_fp16)[name = string("op_1740_cast_fp16")];
+            tensor<int32, [4]> var_1741 = const()[name = string("op_1741"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)];
+            bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_285_perm_0 = const()[name = string("transpose_285_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_286_perm_0 = const()[name = string("transpose_286_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_286 = transpose(perm = transpose_286_perm_0, x = k_75_cast_fp16)[name = string("transpose_582")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_285 = transpose(perm = transpose_285_perm_0, x = q_59_cast_fp16)[name = string("transpose_583")];
+            tensor<fp16, [1, 20, ?, ?]> qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_285, y = transpose_286)[name = string("qk_43_cast_fp16")];
+            int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)];
+            int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)];
+            bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")];
+            tensor<int32, [2]> var_1744_begin_0 = const()[name = string("op_1744_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1744_end_mask_0 = const()[name = string("op_1744_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1744_cast_fp16 = slice_by_index(begin = var_1744_begin_0, end = concat_169, end_mask = var_1744_end_mask_0, x = mask_to_fp16)[name = string("op_1744_cast_fp16")];
+            int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)];
+            int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)];
+            bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")];
+            tensor<int32, [2]> var_1745_begin_0 = const()[name = string("op_1745_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1745_end_mask_0 = const()[name = string("op_1745_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1745_cast_fp16 = slice_by_index(begin = var_1745_begin_0, end = concat_170, end_mask = var_1745_end_mask_0, x = var_1744_cast_fp16)[name = string("op_1745_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1745_cast_fp16)[name = string("qk_45_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_1748_cast_fp16 = softmax(axis = var_1657, x = qk_45_cast_fp16)[name = string("op_1748_cast_fp16")];
+            bool var_1750_transpose_x_0 = const()[name = string("op_1750_transpose_x_0"), val = bool(false)];
+            bool var_1750_transpose_y_0 = const()[name = string("op_1750_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_75_cast_fp16 = transpose(perm = var_1741, x = var_1740_cast_fp16)[name = string("transpose_584")];
+            tensor<fp16, [1, 20, ?, 64]> var_1750_cast_fp16 = matmul(transpose_x = var_1750_transpose_x_0, transpose_y = var_1750_transpose_y_0, x = var_1748_cast_fp16, y = v_75_cast_fp16)[name = string("op_1750_cast_fp16")];
+            tensor<int32, [4]> var_1751 = const()[name = string("op_1751"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_171x = const()[name = string("concat_171x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1752_cast_fp16 = transpose(perm = var_1751, x = var_1750_cast_fp16)[name = string("transpose_581")];
+            tensor<fp16, [1, ?, 1280]> x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1752_cast_fp16)[name = string("x_133_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1756_to_fp16 = const()[name = string("op_1756_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469429248)))];
+            tensor<fp16, [1280]> var_1757_to_fp16 = const()[name = string("op_1757_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472706112)))];
+            tensor<fp16, [1, ?, 1280]> linear_59_cast_fp16 = linear(bias = var_1757_to_fp16, weight = var_1756_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")];
+            tensor<int32, [1]> var_1764_axes_0 = const()[name = string("op_1764_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472708736)))];
+            tensor<fp16, [1280]> blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472711360)))];
+            tensor<fp16, [1, ?, 1280]> var_1764_cast_fp16 = layer_norm(axes = var_1764_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1764_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1773_to_fp16 = const()[name = string("op_1773_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472713984)))];
+            tensor<fp16, [1280]> var_1774_to_fp16 = const()[name = string("op_1774_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475990848)))];
+            tensor<fp16, [1, ?, 1280]> linear_60_cast_fp16 = linear(bias = var_1774_to_fp16, weight = var_1773_to_fp16, x = var_1764_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<int32, [3]> concat_172 = const()[name = string("concat_172"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_173 = const()[name = string("concat_173"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_174 = const()[name = string("concat_174"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_175 = const()[name = string("concat_175"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_176x = const()[name = string("concat_176x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1794_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1794_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_63_cast_fp16 = mul(x = var_1794_cast_fp16, y = const_190_to_fp16)[name = string("q_63_cast_fp16")];
+            tensor<int32, [4]> var_1800 = const()[name = string("op_1800"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1801_cast_fp16 = reshape(shape = var_1800, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1801_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_79_cast_fp16 = mul(x = var_1801_cast_fp16, y = const_191_to_fp16)[name = string("k_79_cast_fp16")];
+            tensor<int32, [4]> var_1807 = const()[name = string("op_1807"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1808_cast_fp16 = reshape(shape = var_1807, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1808_cast_fp16")];
+            tensor<int32, [4]> var_1809 = const()[name = string("op_1809"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)];
+            bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_287_perm_0 = const()[name = string("transpose_287_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_288_perm_0 = const()[name = string("transpose_288_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_288 = transpose(perm = transpose_288_perm_0, x = k_79_cast_fp16)[name = string("transpose_578")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_287 = transpose(perm = transpose_287_perm_0, x = q_63_cast_fp16)[name = string("transpose_579")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_287, y = transpose_288)[name = string("qk_47_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_1813_cast_fp16 = softmax(axis = var_1657, x = qk_47_cast_fp16)[name = string("op_1813_cast_fp16")];
+            bool var_1815_transpose_x_0 = const()[name = string("op_1815_transpose_x_0"), val = bool(false)];
+            bool var_1815_transpose_y_0 = const()[name = string("op_1815_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_79_cast_fp16 = transpose(perm = var_1809, x = var_1808_cast_fp16)[name = string("transpose_580")];
+            tensor<fp16, [1, 20, ?, 64]> var_1815_cast_fp16 = matmul(transpose_x = var_1815_transpose_x_0, transpose_y = var_1815_transpose_y_0, x = var_1813_cast_fp16, y = v_79_cast_fp16)[name = string("op_1815_cast_fp16")];
+            tensor<int32, [4]> var_1816 = const()[name = string("op_1816"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_177x = const()[name = string("concat_177x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1817_cast_fp16 = transpose(perm = var_1816, x = var_1815_cast_fp16)[name = string("transpose_577")];
+            tensor<fp16, [1, ?, 1280]> x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1817_cast_fp16)[name = string("x_139_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1821_to_fp16 = const()[name = string("op_1821_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475993472)))];
+            tensor<fp16, [1280]> var_1822_to_fp16 = const()[name = string("op_1822_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479270336)))];
+            tensor<fp16, [1, ?, 1280]> linear_61_cast_fp16 = linear(bias = var_1822_to_fp16, weight = var_1821_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")];
+            tensor<int32, [1]> var_1829_axes_0 = const()[name = string("op_1829_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479272960)))];
+            tensor<fp16, [1280]> blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479275584)))];
+            tensor<fp16, [1, ?, 1280]> var_1829_cast_fp16 = layer_norm(axes = var_1829_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1829_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1838_to_fp16 = const()[name = string("op_1838_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479278208)))];
+            tensor<fp16, [5120]> var_1839_to_fp16 = const()[name = string("op_1839_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492385472)))];
+            tensor<fp16, [1, ?, 5120]> linear_62_cast_fp16 = linear(bias = var_1839_to_fp16, weight = var_1838_to_fp16, x = var_1829_cast_fp16)[name = string("linear_62_cast_fp16")];
+            string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1844_to_fp16 = const()[name = string("op_1844_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492395776)))];
+            tensor<fp16, [1280]> var_1845_to_fp16 = const()[name = string("op_1845_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505503040)))];
+            tensor<fp16, [1, ?, 1280]> linear_63_cast_fp16 = linear(bias = var_1845_to_fp16, weight = var_1844_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")];
+            tensor<int32, [4]> k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor<int32, [4]>([9, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_78)[name = string("k_cache_33_cast_fp16")];
+            tensor<int32, [4]> v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor<int32, [4]>([9, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_79)[name = string("v_cache_33_cast_fp16")];
+            tensor<int32, [4]> k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor<int32, [4]>([9, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")];
+            tensor<int32, [4]> v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor<int32, [4]>([9, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")];
+            int32 var_1868 = const()[name = string("op_1868"), val = int32(-1)];
+            tensor<int32, [1]> var_1886_axes_0 = const()[name = string("op_1886_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505505664)))];
+            tensor<fp16, [1280]> blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505508288)))];
+            fp16 var_1874_to_fp16 = const()[name = string("op_1874_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_1886_cast_fp16 = layer_norm(axes = var_1886_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1886_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1897_to_fp16 = const()[name = string("op_1897_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505510912)))];
+            tensor<fp16, [1280]> var_1898_to_fp16 = const()[name = string("op_1898_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508787776)))];
+            tensor<fp16, [1, ?, 1280]> linear_64_cast_fp16 = linear(bias = var_1898_to_fp16, weight = var_1897_to_fp16, x = var_1886_cast_fp16)[name = string("linear_64_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1901_to_fp16 = const()[name = string("op_1901_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508790400)))];
+            tensor<fp16, [1, ?, 1280]> linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1901_to_fp16, x = var_1886_cast_fp16)[name = string("linear_65_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512067264)))];
+            tensor<fp16, [1280]> var_1906_to_fp16 = const()[name = string("op_1906_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515344128)))];
+            tensor<fp16, [1, ?, 1280]> linear_66_cast_fp16 = linear(bias = var_1906_to_fp16, weight = var_1905_to_fp16, x = var_1886_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<int32, [3]> var_1908_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1908_shape_cast_fp16")];
+            int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)];
+            int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)];
+            bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)];
+            string var_1908_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1908_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1908_shape_cast_fp16_to_uint16 = cast(dtype = var_1908_shape_cast_fp16_to_uint16_dtype_0, x = var_1908_shape_cast_fp16)[name = string("cast_374")];
+            uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1908_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")];
+            string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_373")];
+            int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")];
+            tensor<int32, [1]> expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")];
+            tensor<int32, [1]> concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor<int32, [1]>([8])];
+            int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)];
+            bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")];
+            tensor<int32, [1]> concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)];
+            bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_78)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_80_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_80 = read_state(input = k_cache1)[name = string("coreml_update_state_80")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_79)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_81_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_81 = read_state(input = v_cache1)[name = string("coreml_update_state_81")];
+            int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)];
+            int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(1280)];
+            int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)];
+            bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")];
+            tensor<int32, [3]> var_1924_begin_0 = const()[name = string("op_1924_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1924_end_mask_0 = const()[name = string("op_1924_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = concat_186, end_mask = var_1924_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1924_cast_fp16")];
+            tensor<int32, [3]> var_1927_begin_0 = const()[name = string("op_1927_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1927_end_mask_0 = const()[name = string("op_1927_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1927_cast_fp16 = slice_by_index(begin = var_1927_begin_0, end = concat_186, end_mask = var_1927_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1927_cast_fp16")];
+            tensor<int32, [4]> concat_188x = const()[name = string("concat_188x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1937_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1937_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_67_cast_fp16 = mul(x = var_1937_cast_fp16, y = const_192_to_fp16)[name = string("q_67_cast_fp16")];
+            tensor<int32, [4]> concat_189x = const()[name = string("concat_189x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1944_cast_fp16 = reshape(shape = concat_189x, x = var_1924_cast_fp16)[name = string("op_1944_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_85_cast_fp16 = mul(x = var_1944_cast_fp16, y = const_193_to_fp16)[name = string("k_85_cast_fp16")];
+            tensor<int32, [4]> concat_190x = const()[name = string("concat_190x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1951_cast_fp16 = reshape(shape = concat_190x, x = var_1927_cast_fp16)[name = string("op_1951_cast_fp16")];
+            tensor<int32, [4]> var_1952 = const()[name = string("op_1952"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)];
+            bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_289_perm_0 = const()[name = string("transpose_289_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_290_perm_0 = const()[name = string("transpose_290_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_290 = transpose(perm = transpose_290_perm_0, x = k_85_cast_fp16)[name = string("transpose_574")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_289 = transpose(perm = transpose_289_perm_0, x = q_67_cast_fp16)[name = string("transpose_575")];
+            tensor<fp16, [1, 20, ?, ?]> qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_289, y = transpose_290)[name = string("qk_49_cast_fp16")];
+            int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)];
+            int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)];
+            bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")];
+            tensor<int32, [2]> var_1955_begin_0 = const()[name = string("op_1955_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1955_end_mask_0 = const()[name = string("op_1955_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1955_cast_fp16 = slice_by_index(begin = var_1955_begin_0, end = concat_191, end_mask = var_1955_end_mask_0, x = mask_to_fp16)[name = string("op_1955_cast_fp16")];
+            int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)];
+            int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)];
+            bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")];
+            tensor<int32, [2]> var_1956_begin_0 = const()[name = string("op_1956_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1956_end_mask_0 = const()[name = string("op_1956_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1956_cast_fp16 = slice_by_index(begin = var_1956_begin_0, end = concat_192, end_mask = var_1956_end_mask_0, x = var_1955_cast_fp16)[name = string("op_1956_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1956_cast_fp16)[name = string("qk_51_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_1959_cast_fp16 = softmax(axis = var_1868, x = qk_51_cast_fp16)[name = string("op_1959_cast_fp16")];
+            bool var_1961_transpose_x_0 = const()[name = string("op_1961_transpose_x_0"), val = bool(false)];
+            bool var_1961_transpose_y_0 = const()[name = string("op_1961_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_85_cast_fp16 = transpose(perm = var_1952, x = var_1951_cast_fp16)[name = string("transpose_576")];
+            tensor<fp16, [1, 20, ?, 64]> var_1961_cast_fp16 = matmul(transpose_x = var_1961_transpose_x_0, transpose_y = var_1961_transpose_y_0, x = var_1959_cast_fp16, y = v_85_cast_fp16)[name = string("op_1961_cast_fp16")];
+            tensor<int32, [4]> var_1962 = const()[name = string("op_1962"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_193x = const()[name = string("concat_193x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1963_cast_fp16 = transpose(perm = var_1962, x = var_1961_cast_fp16)[name = string("transpose_573")];
+            tensor<fp16, [1, ?, 1280]> x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1963_cast_fp16)[name = string("x_151_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1967_to_fp16 = const()[name = string("op_1967_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515346752)))];
+            tensor<fp16, [1280]> var_1968_to_fp16 = const()[name = string("op_1968_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518623616)))];
+            tensor<fp16, [1, ?, 1280]> linear_67_cast_fp16 = linear(bias = var_1968_to_fp16, weight = var_1967_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")];
+            tensor<int32, [1]> var_1975_axes_0 = const()[name = string("op_1975_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518626240)))];
+            tensor<fp16, [1280]> blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518628864)))];
+            tensor<fp16, [1, ?, 1280]> var_1975_cast_fp16 = layer_norm(axes = var_1975_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1975_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1984_to_fp16 = const()[name = string("op_1984_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518631488)))];
+            tensor<fp16, [1280]> var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521908352)))];
+            tensor<fp16, [1, ?, 1280]> linear_68_cast_fp16 = linear(bias = var_1985_to_fp16, weight = var_1984_to_fp16, x = var_1975_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [3]> concat_194 = const()[name = string("concat_194"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_195 = const()[name = string("concat_195"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_196 = const()[name = string("concat_196"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_197 = const()[name = string("concat_197"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_198x = const()[name = string("concat_198x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2005_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_2005_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_194_to_fp16 = const()[name = string("const_194_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_71_cast_fp16 = mul(x = var_2005_cast_fp16, y = const_194_to_fp16)[name = string("q_71_cast_fp16")];
+            tensor<int32, [4]> var_2011 = const()[name = string("op_2011"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2012_cast_fp16 = reshape(shape = var_2011, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_2012_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_89_cast_fp16 = mul(x = var_2012_cast_fp16, y = const_195_to_fp16)[name = string("k_89_cast_fp16")];
+            tensor<int32, [4]> var_2018 = const()[name = string("op_2018"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2019_cast_fp16 = reshape(shape = var_2018, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_2019_cast_fp16")];
+            tensor<int32, [4]> var_2020 = const()[name = string("op_2020"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)];
+            bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_291_perm_0 = const()[name = string("transpose_291_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_292_perm_0 = const()[name = string("transpose_292_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_292 = transpose(perm = transpose_292_perm_0, x = k_89_cast_fp16)[name = string("transpose_570")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_291 = transpose(perm = transpose_291_perm_0, x = q_71_cast_fp16)[name = string("transpose_571")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_291, y = transpose_292)[name = string("qk_53_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_2024_cast_fp16 = softmax(axis = var_1868, x = qk_53_cast_fp16)[name = string("op_2024_cast_fp16")];
+            bool var_2026_transpose_x_0 = const()[name = string("op_2026_transpose_x_0"), val = bool(false)];
+            bool var_2026_transpose_y_0 = const()[name = string("op_2026_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_89_cast_fp16 = transpose(perm = var_2020, x = var_2019_cast_fp16)[name = string("transpose_572")];
+            tensor<fp16, [1, 20, ?, 64]> var_2026_cast_fp16 = matmul(transpose_x = var_2026_transpose_x_0, transpose_y = var_2026_transpose_y_0, x = var_2024_cast_fp16, y = v_89_cast_fp16)[name = string("op_2026_cast_fp16")];
+            tensor<int32, [4]> var_2027 = const()[name = string("op_2027"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_199x = const()[name = string("concat_199x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2028_cast_fp16 = transpose(perm = var_2027, x = var_2026_cast_fp16)[name = string("transpose_569")];
+            tensor<fp16, [1, ?, 1280]> x_157_cast_fp16 = reshape(shape = concat_199x, x = var_2028_cast_fp16)[name = string("x_157_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2032_to_fp16 = const()[name = string("op_2032_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521910976)))];
+            tensor<fp16, [1280]> var_2033_to_fp16 = const()[name = string("op_2033_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525187840)))];
+            tensor<fp16, [1, ?, 1280]> linear_69_cast_fp16 = linear(bias = var_2033_to_fp16, weight = var_2032_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")];
+            tensor<int32, [1]> var_2040_axes_0 = const()[name = string("op_2040_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525190464)))];
+            tensor<fp16, [1280]> blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525193088)))];
+            tensor<fp16, [1, ?, 1280]> var_2040_cast_fp16 = layer_norm(axes = var_2040_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_2040_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2049_to_fp16 = const()[name = string("op_2049_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525195712)))];
+            tensor<fp16, [5120]> var_2050_to_fp16 = const()[name = string("op_2050_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538302976)))];
+            tensor<fp16, [1, ?, 5120]> linear_70_cast_fp16 = linear(bias = var_2050_to_fp16, weight = var_2049_to_fp16, x = var_2040_cast_fp16)[name = string("linear_70_cast_fp16")];
+            string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2055_to_fp16 = const()[name = string("op_2055_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538313280)))];
+            tensor<fp16, [1280]> var_2056_to_fp16 = const()[name = string("op_2056_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551420544)))];
+            tensor<fp16, [1, ?, 1280]> linear_71_cast_fp16 = linear(bias = var_2056_to_fp16, weight = var_2055_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")];
+            tensor<int32, [4]> k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor<int32, [4]>([10, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_80)[name = string("k_cache_37_cast_fp16")];
+            tensor<int32, [4]> v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor<int32, [4]>([10, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_81)[name = string("v_cache_37_cast_fp16")];
+            tensor<int32, [4]> k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor<int32, [4]>([10, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")];
+            tensor<int32, [4]> v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor<int32, [4]>([10, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")];
+            int32 var_2079 = const()[name = string("op_2079"), val = int32(-1)];
+            tensor<int32, [1]> var_2097_axes_0 = const()[name = string("op_2097_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551423168)))];
+            tensor<fp16, [1280]> blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551425792)))];
+            fp16 var_2085_to_fp16 = const()[name = string("op_2085_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_2097_cast_fp16 = layer_norm(axes = var_2097_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2097_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2108_to_fp16 = const()[name = string("op_2108_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551428416)))];
+            tensor<fp16, [1280]> var_2109_to_fp16 = const()[name = string("op_2109_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554705280)))];
+            tensor<fp16, [1, ?, 1280]> linear_72_cast_fp16 = linear(bias = var_2109_to_fp16, weight = var_2108_to_fp16, x = var_2097_cast_fp16)[name = string("linear_72_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2112_to_fp16 = const()[name = string("op_2112_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554707904)))];
+            tensor<fp16, [1, ?, 1280]> linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2112_to_fp16, x = var_2097_cast_fp16)[name = string("linear_73_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2116_to_fp16 = const()[name = string("op_2116_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557984768)))];
+            tensor<fp16, [1280]> var_2117_to_fp16 = const()[name = string("op_2117_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561261632)))];
+            tensor<fp16, [1, ?, 1280]> linear_74_cast_fp16 = linear(bias = var_2117_to_fp16, weight = var_2116_to_fp16, x = var_2097_cast_fp16)[name = string("linear_74_cast_fp16")];
+            tensor<int32, [3]> var_2119_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2119_shape_cast_fp16")];
+            int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)];
+            int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)];
+            bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)];
+            string var_2119_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2119_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2119_shape_cast_fp16_to_uint16 = cast(dtype = var_2119_shape_cast_fp16_to_uint16_dtype_0, x = var_2119_shape_cast_fp16)[name = string("cast_372")];
+            uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2119_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")];
+            string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_371")];
+            int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")];
+            tensor<int32, [1]> expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")];
+            tensor<int32, [1]> concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor<int32, [1]>([9])];
+            int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)];
+            bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")];
+            tensor<int32, [1]> concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)];
+            bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_80)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_82_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_82 = read_state(input = k_cache1)[name = string("coreml_update_state_82")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_81)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_83_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_83 = read_state(input = v_cache1)[name = string("coreml_update_state_83")];
+            int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)];
+            int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(1280)];
+            int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)];
+            bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")];
+            tensor<int32, [3]> var_2135_begin_0 = const()[name = string("op_2135_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2135_end_mask_0 = const()[name = string("op_2135_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2135_cast_fp16 = slice_by_index(begin = var_2135_begin_0, end = concat_208, end_mask = var_2135_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2135_cast_fp16")];
+            tensor<int32, [3]> var_2138_begin_0 = const()[name = string("op_2138_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2138_end_mask_0 = const()[name = string("op_2138_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = concat_208, end_mask = var_2138_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2138_cast_fp16")];
+            tensor<int32, [4]> concat_210x = const()[name = string("concat_210x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2148_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2148_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_75_cast_fp16 = mul(x = var_2148_cast_fp16, y = const_196_to_fp16)[name = string("q_75_cast_fp16")];
+            tensor<int32, [4]> concat_211x = const()[name = string("concat_211x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2155_cast_fp16 = reshape(shape = concat_211x, x = var_2135_cast_fp16)[name = string("op_2155_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_95_cast_fp16 = mul(x = var_2155_cast_fp16, y = const_197_to_fp16)[name = string("k_95_cast_fp16")];
+            tensor<int32, [4]> concat_212x = const()[name = string("concat_212x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2162_cast_fp16 = reshape(shape = concat_212x, x = var_2138_cast_fp16)[name = string("op_2162_cast_fp16")];
+            tensor<int32, [4]> var_2163 = const()[name = string("op_2163"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)];
+            bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_293_perm_0 = const()[name = string("transpose_293_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_294_perm_0 = const()[name = string("transpose_294_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_294 = transpose(perm = transpose_294_perm_0, x = k_95_cast_fp16)[name = string("transpose_566")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_293 = transpose(perm = transpose_293_perm_0, x = q_75_cast_fp16)[name = string("transpose_567")];
+            tensor<fp16, [1, 20, ?, ?]> qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_293, y = transpose_294)[name = string("qk_55_cast_fp16")];
+            int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)];
+            int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)];
+            bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")];
+            tensor<int32, [2]> var_2166_begin_0 = const()[name = string("op_2166_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2166_end_mask_0 = const()[name = string("op_2166_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2166_cast_fp16 = slice_by_index(begin = var_2166_begin_0, end = concat_213, end_mask = var_2166_end_mask_0, x = mask_to_fp16)[name = string("op_2166_cast_fp16")];
+            int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)];
+            int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)];
+            bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")];
+            tensor<int32, [2]> var_2167_begin_0 = const()[name = string("op_2167_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2167_end_mask_0 = const()[name = string("op_2167_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2167_cast_fp16 = slice_by_index(begin = var_2167_begin_0, end = concat_214, end_mask = var_2167_end_mask_0, x = var_2166_cast_fp16)[name = string("op_2167_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2167_cast_fp16)[name = string("qk_57_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_2170_cast_fp16 = softmax(axis = var_2079, x = qk_57_cast_fp16)[name = string("op_2170_cast_fp16")];
+            bool var_2172_transpose_x_0 = const()[name = string("op_2172_transpose_x_0"), val = bool(false)];
+            bool var_2172_transpose_y_0 = const()[name = string("op_2172_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_95_cast_fp16 = transpose(perm = var_2163, x = var_2162_cast_fp16)[name = string("transpose_568")];
+            tensor<fp16, [1, 20, ?, 64]> var_2172_cast_fp16 = matmul(transpose_x = var_2172_transpose_x_0, transpose_y = var_2172_transpose_y_0, x = var_2170_cast_fp16, y = v_95_cast_fp16)[name = string("op_2172_cast_fp16")];
+            tensor<int32, [4]> var_2173 = const()[name = string("op_2173"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_215x = const()[name = string("concat_215x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2174_cast_fp16 = transpose(perm = var_2173, x = var_2172_cast_fp16)[name = string("transpose_565")];
+            tensor<fp16, [1, ?, 1280]> x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2174_cast_fp16)[name = string("x_169_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2178_to_fp16 = const()[name = string("op_2178_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561264256)))];
+            tensor<fp16, [1280]> var_2179_to_fp16 = const()[name = string("op_2179_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564541120)))];
+            tensor<fp16, [1, ?, 1280]> linear_75_cast_fp16 = linear(bias = var_2179_to_fp16, weight = var_2178_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")];
+            tensor<int32, [1]> var_2186_axes_0 = const()[name = string("op_2186_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564543744)))];
+            tensor<fp16, [1280]> blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564546368)))];
+            tensor<fp16, [1, ?, 1280]> var_2186_cast_fp16 = layer_norm(axes = var_2186_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2186_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2195_to_fp16 = const()[name = string("op_2195_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564548992)))];
+            tensor<fp16, [1280]> var_2196_to_fp16 = const()[name = string("op_2196_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567825856)))];
+            tensor<fp16, [1, ?, 1280]> linear_76_cast_fp16 = linear(bias = var_2196_to_fp16, weight = var_2195_to_fp16, x = var_2186_cast_fp16)[name = string("linear_76_cast_fp16")];
+            tensor<int32, [3]> concat_216 = const()[name = string("concat_216"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_217 = const()[name = string("concat_217"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_218 = const()[name = string("concat_218"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_219 = const()[name = string("concat_219"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_220x = const()[name = string("concat_220x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2216_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2216_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_79_cast_fp16 = mul(x = var_2216_cast_fp16, y = const_198_to_fp16)[name = string("q_79_cast_fp16")];
+            tensor<int32, [4]> var_2222 = const()[name = string("op_2222"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2223_cast_fp16 = reshape(shape = var_2222, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2223_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_99_cast_fp16 = mul(x = var_2223_cast_fp16, y = const_199_to_fp16)[name = string("k_99_cast_fp16")];
+            tensor<int32, [4]> var_2229 = const()[name = string("op_2229"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2230_cast_fp16 = reshape(shape = var_2229, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2230_cast_fp16")];
+            tensor<int32, [4]> var_2231 = const()[name = string("op_2231"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)];
+            bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_295_perm_0 = const()[name = string("transpose_295_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_296_perm_0 = const()[name = string("transpose_296_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_296 = transpose(perm = transpose_296_perm_0, x = k_99_cast_fp16)[name = string("transpose_562")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_295 = transpose(perm = transpose_295_perm_0, x = q_79_cast_fp16)[name = string("transpose_563")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_295, y = transpose_296)[name = string("qk_59_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_2235_cast_fp16 = softmax(axis = var_2079, x = qk_59_cast_fp16)[name = string("op_2235_cast_fp16")];
+            bool var_2237_transpose_x_0 = const()[name = string("op_2237_transpose_x_0"), val = bool(false)];
+            bool var_2237_transpose_y_0 = const()[name = string("op_2237_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_99_cast_fp16 = transpose(perm = var_2231, x = var_2230_cast_fp16)[name = string("transpose_564")];
+            tensor<fp16, [1, 20, ?, 64]> var_2237_cast_fp16 = matmul(transpose_x = var_2237_transpose_x_0, transpose_y = var_2237_transpose_y_0, x = var_2235_cast_fp16, y = v_99_cast_fp16)[name = string("op_2237_cast_fp16")];
+            tensor<int32, [4]> var_2238 = const()[name = string("op_2238"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_221x = const()[name = string("concat_221x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2239_cast_fp16 = transpose(perm = var_2238, x = var_2237_cast_fp16)[name = string("transpose_561")];
+            tensor<fp16, [1, ?, 1280]> x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2239_cast_fp16)[name = string("x_175_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2243_to_fp16 = const()[name = string("op_2243_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567828480)))];
+            tensor<fp16, [1280]> var_2244_to_fp16 = const()[name = string("op_2244_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571105344)))];
+            tensor<fp16, [1, ?, 1280]> linear_77_cast_fp16 = linear(bias = var_2244_to_fp16, weight = var_2243_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")];
+            tensor<int32, [1]> var_2251_axes_0 = const()[name = string("op_2251_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571107968)))];
+            tensor<fp16, [1280]> blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571110592)))];
+            tensor<fp16, [1, ?, 1280]> var_2251_cast_fp16 = layer_norm(axes = var_2251_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2251_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2260_to_fp16 = const()[name = string("op_2260_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571113216)))];
+            tensor<fp16, [5120]> var_2261_to_fp16 = const()[name = string("op_2261_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584220480)))];
+            tensor<fp16, [1, ?, 5120]> linear_78_cast_fp16 = linear(bias = var_2261_to_fp16, weight = var_2260_to_fp16, x = var_2251_cast_fp16)[name = string("linear_78_cast_fp16")];
+            string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2266_to_fp16 = const()[name = string("op_2266_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584230784)))];
+            tensor<fp16, [1280]> var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597338048)))];
+            tensor<fp16, [1, ?, 1280]> linear_79_cast_fp16 = linear(bias = var_2267_to_fp16, weight = var_2266_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")];
+            tensor<int32, [4]> k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor<int32, [4]>([11, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_82)[name = string("k_cache_41_cast_fp16")];
+            tensor<int32, [4]> v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor<int32, [4]>([11, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_83)[name = string("v_cache_41_cast_fp16")];
+            tensor<int32, [4]> k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor<int32, [4]>([11, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")];
+            tensor<int32, [4]> v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor<int32, [4]>([11, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")];
+            int32 var_2290 = const()[name = string("op_2290"), val = int32(-1)];
+            tensor<int32, [1]> var_2308_axes_0 = const()[name = string("op_2308_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597340672)))];
+            tensor<fp16, [1280]> blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597343296)))];
+            fp16 var_2296_to_fp16 = const()[name = string("op_2296_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_2308_cast_fp16 = layer_norm(axes = var_2308_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2308_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2319_to_fp16 = const()[name = string("op_2319_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597345920)))];
+            tensor<fp16, [1280]> var_2320_to_fp16 = const()[name = string("op_2320_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600622784)))];
+            tensor<fp16, [1, ?, 1280]> linear_80_cast_fp16 = linear(bias = var_2320_to_fp16, weight = var_2319_to_fp16, x = var_2308_cast_fp16)[name = string("linear_80_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2323_to_fp16 = const()[name = string("op_2323_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600625408)))];
+            tensor<fp16, [1, ?, 1280]> linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2323_to_fp16, x = var_2308_cast_fp16)[name = string("linear_81_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2327_to_fp16 = const()[name = string("op_2327_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(603902272)))];
+            tensor<fp16, [1280]> var_2328_to_fp16 = const()[name = string("op_2328_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607179136)))];
+            tensor<fp16, [1, ?, 1280]> linear_82_cast_fp16 = linear(bias = var_2328_to_fp16, weight = var_2327_to_fp16, x = var_2308_cast_fp16)[name = string("linear_82_cast_fp16")];
+            tensor<int32, [3]> var_2330_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2330_shape_cast_fp16")];
+            int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)];
+            int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)];
+            bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)];
+            string var_2330_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2330_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2330_shape_cast_fp16_to_uint16 = cast(dtype = var_2330_shape_cast_fp16_to_uint16_dtype_0, x = var_2330_shape_cast_fp16)[name = string("cast_370")];
+            uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2330_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")];
+            string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_369")];
+            int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")];
+            tensor<int32, [1]> expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")];
+            tensor<int32, [1]> concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor<int32, [1]>([10])];
+            int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)];
+            bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")];
+            tensor<int32, [1]> concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)];
+            bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_82)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_84_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_84 = read_state(input = k_cache1)[name = string("coreml_update_state_84")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_83)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_85_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_85 = read_state(input = v_cache1)[name = string("coreml_update_state_85")];
+            int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)];
+            int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(1280)];
+            int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)];
+            bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")];
+            tensor<int32, [3]> var_2346_begin_0 = const()[name = string("op_2346_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2346_end_mask_0 = const()[name = string("op_2346_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = concat_230, end_mask = var_2346_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2346_cast_fp16")];
+            tensor<int32, [3]> var_2349_begin_0 = const()[name = string("op_2349_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2349_end_mask_0 = const()[name = string("op_2349_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2349_cast_fp16 = slice_by_index(begin = var_2349_begin_0, end = concat_230, end_mask = var_2349_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2349_cast_fp16")];
+            tensor<int32, [4]> concat_232x = const()[name = string("concat_232x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2359_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2359_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_83_cast_fp16 = mul(x = var_2359_cast_fp16, y = const_200_to_fp16)[name = string("q_83_cast_fp16")];
+            tensor<int32, [4]> concat_233x = const()[name = string("concat_233x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2366_cast_fp16 = reshape(shape = concat_233x, x = var_2346_cast_fp16)[name = string("op_2366_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_105_cast_fp16 = mul(x = var_2366_cast_fp16, y = const_201_to_fp16)[name = string("k_105_cast_fp16")];
+            tensor<int32, [4]> concat_234x = const()[name = string("concat_234x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2373_cast_fp16 = reshape(shape = concat_234x, x = var_2349_cast_fp16)[name = string("op_2373_cast_fp16")];
+            tensor<int32, [4]> var_2374 = const()[name = string("op_2374"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)];
+            bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_297_perm_0 = const()[name = string("transpose_297_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_298_perm_0 = const()[name = string("transpose_298_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_298 = transpose(perm = transpose_298_perm_0, x = k_105_cast_fp16)[name = string("transpose_558")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_297 = transpose(perm = transpose_297_perm_0, x = q_83_cast_fp16)[name = string("transpose_559")];
+            tensor<fp16, [1, 20, ?, ?]> qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_297, y = transpose_298)[name = string("qk_61_cast_fp16")];
+            int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)];
+            int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)];
+            bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")];
+            tensor<int32, [2]> var_2377_begin_0 = const()[name = string("op_2377_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2377_end_mask_0 = const()[name = string("op_2377_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2377_cast_fp16 = slice_by_index(begin = var_2377_begin_0, end = concat_235, end_mask = var_2377_end_mask_0, x = mask_to_fp16)[name = string("op_2377_cast_fp16")];
+            int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)];
+            int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)];
+            bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")];
+            tensor<int32, [2]> var_2378_begin_0 = const()[name = string("op_2378_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2378_end_mask_0 = const()[name = string("op_2378_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = concat_236, end_mask = var_2378_end_mask_0, x = var_2377_cast_fp16)[name = string("op_2378_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2378_cast_fp16)[name = string("qk_63_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_2381_cast_fp16 = softmax(axis = var_2290, x = qk_63_cast_fp16)[name = string("op_2381_cast_fp16")];
+            bool var_2383_transpose_x_0 = const()[name = string("op_2383_transpose_x_0"), val = bool(false)];
+            bool var_2383_transpose_y_0 = const()[name = string("op_2383_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_105_cast_fp16 = transpose(perm = var_2374, x = var_2373_cast_fp16)[name = string("transpose_560")];
+            tensor<fp16, [1, 20, ?, 64]> var_2383_cast_fp16 = matmul(transpose_x = var_2383_transpose_x_0, transpose_y = var_2383_transpose_y_0, x = var_2381_cast_fp16, y = v_105_cast_fp16)[name = string("op_2383_cast_fp16")];
+            tensor<int32, [4]> var_2384 = const()[name = string("op_2384"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_237x = const()[name = string("concat_237x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2385_cast_fp16 = transpose(perm = var_2384, x = var_2383_cast_fp16)[name = string("transpose_557")];
+            tensor<fp16, [1, ?, 1280]> x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2385_cast_fp16)[name = string("x_187_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2389_to_fp16 = const()[name = string("op_2389_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607181760)))];
+            tensor<fp16, [1280]> var_2390_to_fp16 = const()[name = string("op_2390_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610458624)))];
+            tensor<fp16, [1, ?, 1280]> linear_83_cast_fp16 = linear(bias = var_2390_to_fp16, weight = var_2389_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")];
+            tensor<int32, [1]> var_2397_axes_0 = const()[name = string("op_2397_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610461248)))];
+            tensor<fp16, [1280]> blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610463872)))];
+            tensor<fp16, [1, ?, 1280]> var_2397_cast_fp16 = layer_norm(axes = var_2397_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2397_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2406_to_fp16 = const()[name = string("op_2406_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610466496)))];
+            tensor<fp16, [1280]> var_2407_to_fp16 = const()[name = string("op_2407_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613743360)))];
+            tensor<fp16, [1, ?, 1280]> linear_84_cast_fp16 = linear(bias = var_2407_to_fp16, weight = var_2406_to_fp16, x = var_2397_cast_fp16)[name = string("linear_84_cast_fp16")];
+            tensor<int32, [3]> concat_238 = const()[name = string("concat_238"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_239 = const()[name = string("concat_239"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_240 = const()[name = string("concat_240"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_241 = const()[name = string("concat_241"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_242x = const()[name = string("concat_242x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2427_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2427_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_87_cast_fp16 = mul(x = var_2427_cast_fp16, y = const_202_to_fp16)[name = string("q_87_cast_fp16")];
+            tensor<int32, [4]> var_2433 = const()[name = string("op_2433"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2434_cast_fp16 = reshape(shape = var_2433, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2434_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_109_cast_fp16 = mul(x = var_2434_cast_fp16, y = const_203_to_fp16)[name = string("k_109_cast_fp16")];
+            tensor<int32, [4]> var_2440 = const()[name = string("op_2440"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2441_cast_fp16 = reshape(shape = var_2440, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2441_cast_fp16")];
+            tensor<int32, [4]> var_2442 = const()[name = string("op_2442"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)];
+            bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_299_perm_0 = const()[name = string("transpose_299_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_300_perm_0 = const()[name = string("transpose_300_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_300 = transpose(perm = transpose_300_perm_0, x = k_109_cast_fp16)[name = string("transpose_554")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_299 = transpose(perm = transpose_299_perm_0, x = q_87_cast_fp16)[name = string("transpose_555")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_299, y = transpose_300)[name = string("qk_65_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_2446_cast_fp16 = softmax(axis = var_2290, x = qk_65_cast_fp16)[name = string("op_2446_cast_fp16")];
+            bool var_2448_transpose_x_0 = const()[name = string("op_2448_transpose_x_0"), val = bool(false)];
+            bool var_2448_transpose_y_0 = const()[name = string("op_2448_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_109_cast_fp16 = transpose(perm = var_2442, x = var_2441_cast_fp16)[name = string("transpose_556")];
+            tensor<fp16, [1, 20, ?, 64]> var_2448_cast_fp16 = matmul(transpose_x = var_2448_transpose_x_0, transpose_y = var_2448_transpose_y_0, x = var_2446_cast_fp16, y = v_109_cast_fp16)[name = string("op_2448_cast_fp16")];
+            tensor<int32, [4]> var_2449 = const()[name = string("op_2449"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_243x = const()[name = string("concat_243x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2450_cast_fp16 = transpose(perm = var_2449, x = var_2448_cast_fp16)[name = string("transpose_553")];
+            tensor<fp16, [1, ?, 1280]> x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2450_cast_fp16)[name = string("x_193_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2454_to_fp16 = const()[name = string("op_2454_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613745984)))];
+            tensor<fp16, [1280]> var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617022848)))];
+            tensor<fp16, [1, ?, 1280]> linear_85_cast_fp16 = linear(bias = var_2455_to_fp16, weight = var_2454_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")];
+            tensor<int32, [1]> var_2462_axes_0 = const()[name = string("op_2462_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617025472)))];
+            tensor<fp16, [1280]> blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617028096)))];
+            tensor<fp16, [1, ?, 1280]> var_2462_cast_fp16 = layer_norm(axes = var_2462_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2462_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2471_to_fp16 = const()[name = string("op_2471_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617030720)))];
+            tensor<fp16, [5120]> var_2472_to_fp16 = const()[name = string("op_2472_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630137984)))];
+            tensor<fp16, [1, ?, 5120]> linear_86_cast_fp16 = linear(bias = var_2472_to_fp16, weight = var_2471_to_fp16, x = var_2462_cast_fp16)[name = string("linear_86_cast_fp16")];
+            string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2477_to_fp16 = const()[name = string("op_2477_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630148288)))];
+            tensor<fp16, [1280]> var_2478_to_fp16 = const()[name = string("op_2478_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643255552)))];
+            tensor<fp16, [1, ?, 1280]> linear_87_cast_fp16 = linear(bias = var_2478_to_fp16, weight = var_2477_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")];
+            tensor<int32, [4]> k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor<int32, [4]>([12, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_84)[name = string("k_cache_45_cast_fp16")];
+            tensor<int32, [4]> v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor<int32, [4]>([12, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_85)[name = string("v_cache_45_cast_fp16")];
+            tensor<int32, [4]> k_cache_47_begin_0 = const()[name = string("k_cache_47_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_47_end_0 = const()[name = string("k_cache_47_end_0"), val = tensor<int32, [4]>([12, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_47_end_mask_0 = const()[name = string("k_cache_47_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_47_squeeze_mask_0 = const()[name = string("k_cache_47_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_47_cast_fp16 = slice_by_index(begin = k_cache_47_begin_0, end = k_cache_47_end_0, end_mask = k_cache_47_end_mask_0, squeeze_mask = k_cache_47_squeeze_mask_0, x = read_state_2)[name = string("k_cache_47_cast_fp16")];
+            tensor<int32, [4]> v_cache_47_begin_0 = const()[name = string("v_cache_47_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_47_end_0 = const()[name = string("v_cache_47_end_0"), val = tensor<int32, [4]>([12, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_47_end_mask_0 = const()[name = string("v_cache_47_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_47_squeeze_mask_0 = const()[name = string("v_cache_47_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_47_cast_fp16 = slice_by_index(begin = v_cache_47_begin_0, end = v_cache_47_end_0, end_mask = v_cache_47_end_mask_0, squeeze_mask = v_cache_47_squeeze_mask_0, x = read_state_3)[name = string("v_cache_47_cast_fp16")];
+            int32 var_2501 = const()[name = string("op_2501"), val = int32(-1)];
+            tensor<int32, [1]> var_2519_axes_0 = const()[name = string("op_2519_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643258176)))];
+            tensor<fp16, [1280]> blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643260800)))];
+            fp16 var_2507_to_fp16 = const()[name = string("op_2507_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_2519_cast_fp16 = layer_norm(axes = var_2519_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2519_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2530_to_fp16 = const()[name = string("op_2530_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643263424)))];
+            tensor<fp16, [1280]> var_2531_to_fp16 = const()[name = string("op_2531_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646540288)))];
+            tensor<fp16, [1, ?, 1280]> linear_88_cast_fp16 = linear(bias = var_2531_to_fp16, weight = var_2530_to_fp16, x = var_2519_cast_fp16)[name = string("linear_88_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2534_to_fp16 = const()[name = string("op_2534_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646542912)))];
+            tensor<fp16, [1, ?, 1280]> linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2534_to_fp16, x = var_2519_cast_fp16)[name = string("linear_89_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649819776)))];
+            tensor<fp16, [1280]> var_2539_to_fp16 = const()[name = string("op_2539_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653096640)))];
+            tensor<fp16, [1, ?, 1280]> linear_90_cast_fp16 = linear(bias = var_2539_to_fp16, weight = var_2538_to_fp16, x = var_2519_cast_fp16)[name = string("linear_90_cast_fp16")];
+            tensor<int32, [3]> var_2541_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2541_shape_cast_fp16")];
+            int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)];
+            int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)];
+            bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)];
+            string var_2541_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2541_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2541_shape_cast_fp16_to_uint16 = cast(dtype = var_2541_shape_cast_fp16_to_uint16_dtype_0, x = var_2541_shape_cast_fp16)[name = string("cast_368")];
+            uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2541_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")];
+            string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_367")];
+            int32 end_step_25 = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step_25")];
+            tensor<int32, [1]> expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step_25)[name = string("expand_dims_179")];
+            tensor<int32, [1]> concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor<int32, [1]>([11])];
+            int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)];
+            bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")];
+            tensor<int32, [1]> concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)];
+            bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_84)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_86_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_86 = read_state(input = k_cache1)[name = string("coreml_update_state_86")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_85)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_87_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_87 = read_state(input = v_cache1)[name = string("coreml_update_state_87")];
+            int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)];
+            int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(1280)];
+            int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)];
+            bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step_25, concat_252_values2_0))[name = string("concat_252")];
+            tensor<int32, [3]> var_2557_begin_0 = const()[name = string("op_2557_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2557_end_mask_0 = const()[name = string("op_2557_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2557_cast_fp16 = slice_by_index(begin = var_2557_begin_0, end = concat_252, end_mask = var_2557_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2557_cast_fp16")];
+            tensor<int32, [3]> var_2560_begin_0 = const()[name = string("op_2560_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2560_end_mask_0 = const()[name = string("op_2560_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2560_cast_fp16 = slice_by_index(begin = var_2560_begin_0, end = concat_252, end_mask = var_2560_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2560_cast_fp16")];
+            tensor<int32, [4]> concat_254x = const()[name = string("concat_254x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2570_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2570_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_91_cast_fp16 = mul(x = var_2570_cast_fp16, y = const_204_to_fp16)[name = string("q_91_cast_fp16")];
+            tensor<int32, [4]> concat_255x = const()[name = string("concat_255x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2577_cast_fp16 = reshape(shape = concat_255x, x = var_2557_cast_fp16)[name = string("op_2577_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_205_to_fp16 = const()[name = string("const_205_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_115_cast_fp16 = mul(x = var_2577_cast_fp16, y = const_205_to_fp16)[name = string("k_115_cast_fp16")];
+            tensor<int32, [4]> concat_256x = const()[name = string("concat_256x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2584_cast_fp16 = reshape(shape = concat_256x, x = var_2560_cast_fp16)[name = string("op_2584_cast_fp16")];
+            tensor<int32, [4]> var_2585 = const()[name = string("op_2585"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)];
+            bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_301_perm_0 = const()[name = string("transpose_301_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_302_perm_0 = const()[name = string("transpose_302_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_302 = transpose(perm = transpose_302_perm_0, x = k_115_cast_fp16)[name = string("transpose_550")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_301 = transpose(perm = transpose_301_perm_0, x = q_91_cast_fp16)[name = string("transpose_551")];
+            tensor<fp16, [1, 20, ?, ?]> qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_301, y = transpose_302)[name = string("qk_67_cast_fp16")];
+            int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)];
+            int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)];
+            bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")];
+            tensor<int32, [2]> var_2588_begin_0 = const()[name = string("op_2588_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2588_end_mask_0 = const()[name = string("op_2588_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2588_cast_fp16 = slice_by_index(begin = var_2588_begin_0, end = concat_257, end_mask = var_2588_end_mask_0, x = mask_to_fp16)[name = string("op_2588_cast_fp16")];
+            int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)];
+            int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)];
+            bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")];
+            tensor<int32, [2]> var_2589_begin_0 = const()[name = string("op_2589_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2589_end_mask_0 = const()[name = string("op_2589_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2589_cast_fp16 = slice_by_index(begin = var_2589_begin_0, end = concat_258, end_mask = var_2589_end_mask_0, x = var_2588_cast_fp16)[name = string("op_2589_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2589_cast_fp16)[name = string("qk_69_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_2592_cast_fp16 = softmax(axis = var_2501, x = qk_69_cast_fp16)[name = string("op_2592_cast_fp16")];
+            bool var_2594_transpose_x_0 = const()[name = string("op_2594_transpose_x_0"), val = bool(false)];
+            bool var_2594_transpose_y_0 = const()[name = string("op_2594_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_115_cast_fp16 = transpose(perm = var_2585, x = var_2584_cast_fp16)[name = string("transpose_552")];
+            tensor<fp16, [1, 20, ?, 64]> var_2594_cast_fp16 = matmul(transpose_x = var_2594_transpose_x_0, transpose_y = var_2594_transpose_y_0, x = var_2592_cast_fp16, y = v_115_cast_fp16)[name = string("op_2594_cast_fp16")];
+            tensor<int32, [4]> var_2595 = const()[name = string("op_2595"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_259x = const()[name = string("concat_259x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2596_cast_fp16 = transpose(perm = var_2595, x = var_2594_cast_fp16)[name = string("transpose_549")];
+            tensor<fp16, [1, ?, 1280]> x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2596_cast_fp16)[name = string("x_205_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2600_to_fp16 = const()[name = string("op_2600_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653099264)))];
+            tensor<fp16, [1280]> var_2601_to_fp16 = const()[name = string("op_2601_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656376128)))];
+            tensor<fp16, [1, ?, 1280]> linear_91_cast_fp16 = linear(bias = var_2601_to_fp16, weight = var_2600_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")];
+            tensor<int32, [1]> var_2608_axes_0 = const()[name = string("op_2608_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656378752)))];
+            tensor<fp16, [1280]> blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656381376)))];
+            tensor<fp16, [1, ?, 1280]> var_2608_cast_fp16 = layer_norm(axes = var_2608_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2608_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2617_to_fp16 = const()[name = string("op_2617_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656384000)))];
+            tensor<fp16, [1280]> var_2618_to_fp16 = const()[name = string("op_2618_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659660864)))];
+            tensor<fp16, [1, ?, 1280]> linear_92_cast_fp16 = linear(bias = var_2618_to_fp16, weight = var_2617_to_fp16, x = var_2608_cast_fp16)[name = string("linear_92_cast_fp16")];
+            tensor<int32, [3]> concat_260 = const()[name = string("concat_260"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_261 = const()[name = string("concat_261"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_262 = const()[name = string("concat_262"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_263 = const()[name = string("concat_263"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_264x = const()[name = string("concat_264x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2638_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2638_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_95_cast_fp16 = mul(x = var_2638_cast_fp16, y = const_206_to_fp16)[name = string("q_95_cast_fp16")];
+            tensor<int32, [4]> var_2644 = const()[name = string("op_2644"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2645_cast_fp16 = reshape(shape = var_2644, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2645_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_119_cast_fp16 = mul(x = var_2645_cast_fp16, y = const_207_to_fp16)[name = string("k_119_cast_fp16")];
+            tensor<int32, [4]> var_2651 = const()[name = string("op_2651"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2652_cast_fp16 = reshape(shape = var_2651, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2652_cast_fp16")];
+            tensor<int32, [4]> var_2653 = const()[name = string("op_2653"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_71_transpose_x_0 = const()[name = string("qk_71_transpose_x_0"), val = bool(false)];
+            bool qk_71_transpose_y_0 = const()[name = string("qk_71_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_303_perm_0 = const()[name = string("transpose_303_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_304_perm_0 = const()[name = string("transpose_304_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_304 = transpose(perm = transpose_304_perm_0, x = k_119_cast_fp16)[name = string("transpose_546")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_303 = transpose(perm = transpose_303_perm_0, x = q_95_cast_fp16)[name = string("transpose_547")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_71_cast_fp16 = matmul(transpose_x = qk_71_transpose_x_0, transpose_y = qk_71_transpose_y_0, x = transpose_303, y = transpose_304)[name = string("qk_71_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_2657_cast_fp16 = softmax(axis = var_2501, x = qk_71_cast_fp16)[name = string("op_2657_cast_fp16")];
+            bool var_2659_transpose_x_0 = const()[name = string("op_2659_transpose_x_0"), val = bool(false)];
+            bool var_2659_transpose_y_0 = const()[name = string("op_2659_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_119_cast_fp16 = transpose(perm = var_2653, x = var_2652_cast_fp16)[name = string("transpose_548")];
+            tensor<fp16, [1, 20, ?, 64]> var_2659_cast_fp16 = matmul(transpose_x = var_2659_transpose_x_0, transpose_y = var_2659_transpose_y_0, x = var_2657_cast_fp16, y = v_119_cast_fp16)[name = string("op_2659_cast_fp16")];
+            tensor<int32, [4]> var_2660 = const()[name = string("op_2660"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_265x = const()[name = string("concat_265x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2661_cast_fp16 = transpose(perm = var_2660, x = var_2659_cast_fp16)[name = string("transpose_545")];
+            tensor<fp16, [1, ?, 1280]> x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2661_cast_fp16)[name = string("x_211_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2665_to_fp16 = const()[name = string("op_2665_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659663488)))];
+            tensor<fp16, [1280]> var_2666_to_fp16 = const()[name = string("op_2666_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662940352)))];
+            tensor<fp16, [1, ?, 1280]> linear_93_cast_fp16 = linear(bias = var_2666_to_fp16, weight = var_2665_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")];
+            tensor<int32, [1]> var_2673_axes_0 = const()[name = string("op_2673_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662942976)))];
+            tensor<fp16, [1280]> blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662945600)))];
+            tensor<fp16, [1, ?, 1280]> var_2673_cast_fp16 = layer_norm(axes = var_2673_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2673_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2682_to_fp16 = const()[name = string("op_2682_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662948224)))];
+            tensor<fp16, [5120]> var_2683_to_fp16 = const()[name = string("op_2683_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676055488)))];
+            tensor<fp16, [1, ?, 5120]> linear_94_cast_fp16 = linear(bias = var_2683_to_fp16, weight = var_2682_to_fp16, x = var_2673_cast_fp16)[name = string("linear_94_cast_fp16")];
+            string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2688_to_fp16 = const()[name = string("op_2688_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676065792)))];
+            tensor<fp16, [1280]> var_2689_to_fp16 = const()[name = string("op_2689_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689173056)))];
+            tensor<fp16, [1, ?, 1280]> linear_95_cast_fp16 = linear(bias = var_2689_to_fp16, weight = var_2688_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")];
+            tensor<int32, [4]> k_cache_49_begin_0 = const()[name = string("k_cache_49_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_49_end_0 = const()[name = string("k_cache_49_end_0"), val = tensor<int32, [4]>([13, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_49_end_mask_0 = const()[name = string("k_cache_49_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_49_squeeze_mask_0 = const()[name = string("k_cache_49_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_49_cast_fp16 = slice_by_index(begin = k_cache_49_begin_0, end = k_cache_49_end_0, end_mask = k_cache_49_end_mask_0, squeeze_mask = k_cache_49_squeeze_mask_0, x = coreml_update_state_86)[name = string("k_cache_49_cast_fp16")];
+            tensor<int32, [4]> v_cache_49_begin_0 = const()[name = string("v_cache_49_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_49_end_0 = const()[name = string("v_cache_49_end_0"), val = tensor<int32, [4]>([13, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_49_end_mask_0 = const()[name = string("v_cache_49_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_49_squeeze_mask_0 = const()[name = string("v_cache_49_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_49_cast_fp16 = slice_by_index(begin = v_cache_49_begin_0, end = v_cache_49_end_0, end_mask = v_cache_49_end_mask_0, squeeze_mask = v_cache_49_squeeze_mask_0, x = coreml_update_state_87)[name = string("v_cache_49_cast_fp16")];
+            tensor<int32, [4]> k_cache_51_begin_0 = const()[name = string("k_cache_51_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_51_end_0 = const()[name = string("k_cache_51_end_0"), val = tensor<int32, [4]>([13, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_51_end_mask_0 = const()[name = string("k_cache_51_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_51_squeeze_mask_0 = const()[name = string("k_cache_51_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_51_cast_fp16 = slice_by_index(begin = k_cache_51_begin_0, end = k_cache_51_end_0, end_mask = k_cache_51_end_mask_0, squeeze_mask = k_cache_51_squeeze_mask_0, x = read_state_2)[name = string("k_cache_51_cast_fp16")];
+            tensor<int32, [4]> v_cache_51_begin_0 = const()[name = string("v_cache_51_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_51_end_0 = const()[name = string("v_cache_51_end_0"), val = tensor<int32, [4]>([13, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_51_end_mask_0 = const()[name = string("v_cache_51_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_51_squeeze_mask_0 = const()[name = string("v_cache_51_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_51_cast_fp16 = slice_by_index(begin = v_cache_51_begin_0, end = v_cache_51_end_0, end_mask = v_cache_51_end_mask_0, squeeze_mask = v_cache_51_squeeze_mask_0, x = read_state_3)[name = string("v_cache_51_cast_fp16")];
+            int32 var_2712 = const()[name = string("op_2712"), val = int32(-1)];
+            tensor<int32, [1]> var_2730_axes_0 = const()[name = string("op_2730_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689175680)))];
+            tensor<fp16, [1280]> blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689178304)))];
+            fp16 var_2718_to_fp16 = const()[name = string("op_2718_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_2730_cast_fp16 = layer_norm(axes = var_2730_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2730_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689180928)))];
+            tensor<fp16, [1280]> var_2742_to_fp16 = const()[name = string("op_2742_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692457792)))];
+            tensor<fp16, [1, ?, 1280]> linear_96_cast_fp16 = linear(bias = var_2742_to_fp16, weight = var_2741_to_fp16, x = var_2730_cast_fp16)[name = string("linear_96_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2745_to_fp16 = const()[name = string("op_2745_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692460416)))];
+            tensor<fp16, [1, ?, 1280]> linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2745_to_fp16, x = var_2730_cast_fp16)[name = string("linear_97_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2749_to_fp16 = const()[name = string("op_2749_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695737280)))];
+            tensor<fp16, [1280]> var_2750_to_fp16 = const()[name = string("op_2750_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(699014144)))];
+            tensor<fp16, [1, ?, 1280]> linear_98_cast_fp16 = linear(bias = var_2750_to_fp16, weight = var_2749_to_fp16, x = var_2730_cast_fp16)[name = string("linear_98_cast_fp16")];
+            tensor<int32, [3]> var_2752_shape_cast_fp16 = shape(x = linear_96_cast_fp16)[name = string("op_2752_shape_cast_fp16")];
+            int32 gather_146_axis_0 = const()[name = string("gather_146_axis_0"), val = int32(0)];
+            int32 gather_146_batch_dims_0 = const()[name = string("gather_146_batch_dims_0"), val = int32(0)];
+            bool gather_146_validate_indices_0 = const()[name = string("gather_146_validate_indices_0"), val = bool(false)];
+            string var_2752_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2752_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_146_to_uint16 = const()[name = string("select_146_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2752_shape_cast_fp16_to_uint16 = cast(dtype = var_2752_shape_cast_fp16_to_uint16_dtype_0, x = var_2752_shape_cast_fp16)[name = string("cast_366")];
+            uint16 gather_146_cast_uint16 = gather(axis = gather_146_axis_0, batch_dims = gather_146_batch_dims_0, indices = select_146_to_uint16, validate_indices = gather_146_validate_indices_0, x = var_2752_shape_cast_fp16_to_uint16)[name = string("gather_146_cast_uint16")];
+            string gather_146_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_146_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_146_cast_uint16_to_int32 = cast(dtype = gather_146_cast_uint16_to_int32_dtype_0, x = gather_146_cast_uint16)[name = string("cast_365")];
+            int32 end_step_27 = add(x = offset, y = gather_146_cast_uint16_to_int32)[name = string("end_step_27")];
+            tensor<int32, [1]> expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = end_step_27)[name = string("expand_dims_195")];
+            tensor<int32, [1]> concat_268_values0_0 = const()[name = string("concat_268_values0_0"), val = tensor<int32, [1]>([12])];
+            int32 concat_268_axis_0 = const()[name = string("concat_268_axis_0"), val = int32(0)];
+            bool concat_268_interleave_0 = const()[name = string("concat_268_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_268 = concat(axis = concat_268_axis_0, interleave = concat_268_interleave_0, values = (concat_268_values0_0, expand_dims_192, expand_dims_1, expand_dims_194))[name = string("concat_268")];
+            tensor<int32, [1]> concat_269_values0_0 = const()[name = string("concat_269_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_269_values1_0 = const()[name = string("concat_269_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_269_values3_0 = const()[name = string("concat_269_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_269_axis_0 = const()[name = string("concat_269_axis_0"), val = int32(0)];
+            bool concat_269_interleave_0 = const()[name = string("concat_269_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_269 = concat(axis = concat_269_axis_0, interleave = concat_269_interleave_0, values = (concat_269_values0_0, concat_269_values1_0, expand_dims_195, concat_269_values3_0))[name = string("concat_269")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = k_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = k_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_13_stride_0, update = linear_97_cast_fp16, x = coreml_update_state_86)[name = string("k_cache1_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_13_cast_fp16, input = k_cache1)[name = string("coreml_update_state_88_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_88 = read_state(input = k_cache1)[name = string("coreml_update_state_88")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = v_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = v_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_13_stride_0, update = linear_98_cast_fp16, x = coreml_update_state_87)[name = string("v_cache1_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_13_cast_fp16, input = v_cache1)[name = string("coreml_update_state_89_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_89 = read_state(input = v_cache1)[name = string("coreml_update_state_89")];
+            int32 concat_274_values0_0 = const()[name = string("concat_274_values0_0"), val = int32(1)];
+            int32 concat_274_values2_0 = const()[name = string("concat_274_values2_0"), val = int32(1280)];
+            int32 concat_274_axis_0 = const()[name = string("concat_274_axis_0"), val = int32(0)];
+            bool concat_274_interleave_0 = const()[name = string("concat_274_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_274 = concat(axis = concat_274_axis_0, interleave = concat_274_interleave_0, values = (concat_274_values0_0, end_step_27, concat_274_values2_0))[name = string("concat_274")];
+            tensor<int32, [3]> var_2768_begin_0 = const()[name = string("op_2768_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2768_end_mask_0 = const()[name = string("op_2768_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2768_cast_fp16 = slice_by_index(begin = var_2768_begin_0, end = concat_274, end_mask = var_2768_end_mask_0, x = k_cache_49_cast_fp16)[name = string("op_2768_cast_fp16")];
+            tensor<int32, [3]> var_2771_begin_0 = const()[name = string("op_2771_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2771_end_mask_0 = const()[name = string("op_2771_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2771_cast_fp16 = slice_by_index(begin = var_2771_begin_0, end = concat_274, end_mask = var_2771_end_mask_0, x = v_cache_49_cast_fp16)[name = string("op_2771_cast_fp16")];
+            tensor<int32, [4]> concat_276x = const()[name = string("concat_276x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2781_cast_fp16 = reshape(shape = concat_276x, x = linear_96_cast_fp16)[name = string("op_2781_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_208_to_fp16 = const()[name = string("const_208_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_99_cast_fp16 = mul(x = var_2781_cast_fp16, y = const_208_to_fp16)[name = string("q_99_cast_fp16")];
+            tensor<int32, [4]> concat_277x = const()[name = string("concat_277x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2788_cast_fp16 = reshape(shape = concat_277x, x = var_2768_cast_fp16)[name = string("op_2788_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_125_cast_fp16 = mul(x = var_2788_cast_fp16, y = const_209_to_fp16)[name = string("k_125_cast_fp16")];
+            tensor<int32, [4]> concat_278x = const()[name = string("concat_278x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2795_cast_fp16 = reshape(shape = concat_278x, x = var_2771_cast_fp16)[name = string("op_2795_cast_fp16")];
+            tensor<int32, [4]> var_2796 = const()[name = string("op_2796"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_73_transpose_x_0 = const()[name = string("qk_73_transpose_x_0"), val = bool(false)];
+            bool qk_73_transpose_y_0 = const()[name = string("qk_73_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_305_perm_0 = const()[name = string("transpose_305_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_306_perm_0 = const()[name = string("transpose_306_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_306 = transpose(perm = transpose_306_perm_0, x = k_125_cast_fp16)[name = string("transpose_542")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_305 = transpose(perm = transpose_305_perm_0, x = q_99_cast_fp16)[name = string("transpose_543")];
+            tensor<fp16, [1, 20, ?, ?]> qk_73_cast_fp16 = matmul(transpose_x = qk_73_transpose_x_0, transpose_y = qk_73_transpose_y_0, x = transpose_305, y = transpose_306)[name = string("qk_73_cast_fp16")];
+            int32 concat_279_values1_0 = const()[name = string("concat_279_values1_0"), val = int32(448)];
+            int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)];
+            bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_146_cast_uint16_to_int32, concat_279_values1_0))[name = string("concat_279")];
+            tensor<int32, [2]> var_2799_begin_0 = const()[name = string("op_2799_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2799_end_mask_0 = const()[name = string("op_2799_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2799_cast_fp16 = slice_by_index(begin = var_2799_begin_0, end = concat_279, end_mask = var_2799_end_mask_0, x = mask_to_fp16)[name = string("op_2799_cast_fp16")];
+            int32 concat_280_values0_0 = const()[name = string("concat_280_values0_0"), val = int32(0)];
+            int32 concat_280_axis_0 = const()[name = string("concat_280_axis_0"), val = int32(0)];
+            bool concat_280_interleave_0 = const()[name = string("concat_280_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_280 = concat(axis = concat_280_axis_0, interleave = concat_280_interleave_0, values = (concat_280_values0_0, gather_146_cast_uint16_to_int32))[name = string("concat_280")];
+            tensor<int32, [2]> var_2800_begin_0 = const()[name = string("op_2800_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2800_end_mask_0 = const()[name = string("op_2800_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2800_cast_fp16 = slice_by_index(begin = var_2800_begin_0, end = concat_280, end_mask = var_2800_end_mask_0, x = var_2799_cast_fp16)[name = string("op_2800_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_75_cast_fp16 = add(x = qk_73_cast_fp16, y = var_2800_cast_fp16)[name = string("qk_75_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_2803_cast_fp16 = softmax(axis = var_2712, x = qk_75_cast_fp16)[name = string("op_2803_cast_fp16")];
+            bool var_2805_transpose_x_0 = const()[name = string("op_2805_transpose_x_0"), val = bool(false)];
+            bool var_2805_transpose_y_0 = const()[name = string("op_2805_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_125_cast_fp16 = transpose(perm = var_2796, x = var_2795_cast_fp16)[name = string("transpose_544")];
+            tensor<fp16, [1, 20, ?, 64]> var_2805_cast_fp16 = matmul(transpose_x = var_2805_transpose_x_0, transpose_y = var_2805_transpose_y_0, x = var_2803_cast_fp16, y = v_125_cast_fp16)[name = string("op_2805_cast_fp16")];
+            tensor<int32, [4]> var_2806 = const()[name = string("op_2806"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_281x = const()[name = string("concat_281x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2807_cast_fp16 = transpose(perm = var_2806, x = var_2805_cast_fp16)[name = string("transpose_541")];
+            tensor<fp16, [1, ?, 1280]> x_223_cast_fp16 = reshape(shape = concat_281x, x = var_2807_cast_fp16)[name = string("x_223_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2811_to_fp16 = const()[name = string("op_2811_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(699016768)))];
+            tensor<fp16, [1280]> var_2812_to_fp16 = const()[name = string("op_2812_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702293632)))];
+            tensor<fp16, [1, ?, 1280]> linear_99_cast_fp16 = linear(bias = var_2812_to_fp16, weight = var_2811_to_fp16, x = x_223_cast_fp16)[name = string("linear_99_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_225_cast_fp16 = add(x = x_219_cast_fp16, y = linear_99_cast_fp16)[name = string("x_225_cast_fp16")];
+            tensor<int32, [1]> var_2819_axes_0 = const()[name = string("op_2819_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_12_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702296256)))];
+            tensor<fp16, [1280]> blocks_12_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702298880)))];
+            tensor<fp16, [1, ?, 1280]> var_2819_cast_fp16 = layer_norm(axes = var_2819_axes_0, beta = blocks_12_cross_attn_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_cross_attn_ln_weight_to_fp16, x = x_225_cast_fp16)[name = string("op_2819_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2828_to_fp16 = const()[name = string("op_2828_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702301504)))];
+            tensor<fp16, [1280]> var_2829_to_fp16 = const()[name = string("op_2829_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705578368)))];
+            tensor<fp16, [1, ?, 1280]> linear_100_cast_fp16 = linear(bias = var_2829_to_fp16, weight = var_2828_to_fp16, x = var_2819_cast_fp16)[name = string("linear_100_cast_fp16")];
+            tensor<int32, [3]> concat_282 = const()[name = string("concat_282"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_283 = const()[name = string("concat_283"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_127_internal_tensor_assign_1_stride_0 = const()[name = string("k_127_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_282, begin_mask = k_127_internal_tensor_assign_1_begin_mask_0, end = concat_283, end_mask = k_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_127_internal_tensor_assign_1_squeeze_mask_0, stride = k_127_internal_tensor_assign_1_stride_0, update = k_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("k_127_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_284 = const()[name = string("concat_284"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_285 = const()[name = string("concat_285"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_127_internal_tensor_assign_1_stride_0 = const()[name = string("v_127_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_284, begin_mask = v_127_internal_tensor_assign_1_begin_mask_0, end = concat_285, end_mask = v_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_127_internal_tensor_assign_1_squeeze_mask_0, stride = v_127_internal_tensor_assign_1_stride_0, update = v_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("v_127_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_286x = const()[name = string("concat_286x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2849_cast_fp16 = reshape(shape = concat_286x, x = linear_100_cast_fp16)[name = string("op_2849_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_103_cast_fp16 = mul(x = var_2849_cast_fp16, y = const_210_to_fp16)[name = string("q_103_cast_fp16")];
+            tensor<int32, [4]> var_2855 = const()[name = string("op_2855"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2856_cast_fp16 = reshape(shape = var_2855, x = k_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2856_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_129_cast_fp16 = mul(x = var_2856_cast_fp16, y = const_211_to_fp16)[name = string("k_129_cast_fp16")];
+            tensor<int32, [4]> var_2862 = const()[name = string("op_2862"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2863_cast_fp16 = reshape(shape = var_2862, x = v_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2863_cast_fp16")];
+            tensor<int32, [4]> var_2864 = const()[name = string("op_2864"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_77_transpose_x_0 = const()[name = string("qk_77_transpose_x_0"), val = bool(false)];
+            bool qk_77_transpose_y_0 = const()[name = string("qk_77_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_307_perm_0 = const()[name = string("transpose_307_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_308_perm_0 = const()[name = string("transpose_308_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_308 = transpose(perm = transpose_308_perm_0, x = k_129_cast_fp16)[name = string("transpose_538")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_307 = transpose(perm = transpose_307_perm_0, x = q_103_cast_fp16)[name = string("transpose_539")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_77_cast_fp16 = matmul(transpose_x = qk_77_transpose_x_0, transpose_y = qk_77_transpose_y_0, x = transpose_307, y = transpose_308)[name = string("qk_77_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_2868_cast_fp16 = softmax(axis = var_2712, x = qk_77_cast_fp16)[name = string("op_2868_cast_fp16")];
+            bool var_2870_transpose_x_0 = const()[name = string("op_2870_transpose_x_0"), val = bool(false)];
+            bool var_2870_transpose_y_0 = const()[name = string("op_2870_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_129_cast_fp16 = transpose(perm = var_2864, x = var_2863_cast_fp16)[name = string("transpose_540")];
+            tensor<fp16, [1, 20, ?, 64]> var_2870_cast_fp16 = matmul(transpose_x = var_2870_transpose_x_0, transpose_y = var_2870_transpose_y_0, x = var_2868_cast_fp16, y = v_129_cast_fp16)[name = string("op_2870_cast_fp16")];
+            tensor<int32, [4]> var_2871 = const()[name = string("op_2871"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_287x = const()[name = string("concat_287x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2872_cast_fp16 = transpose(perm = var_2871, x = var_2870_cast_fp16)[name = string("transpose_537")];
+            tensor<fp16, [1, ?, 1280]> x_229_cast_fp16 = reshape(shape = concat_287x, x = var_2872_cast_fp16)[name = string("x_229_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2876_to_fp16 = const()[name = string("op_2876_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705580992)))];
+            tensor<fp16, [1280]> var_2877_to_fp16 = const()[name = string("op_2877_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708857856)))];
+            tensor<fp16, [1, ?, 1280]> linear_101_cast_fp16 = linear(bias = var_2877_to_fp16, weight = var_2876_to_fp16, x = x_229_cast_fp16)[name = string("linear_101_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_231_cast_fp16 = add(x = x_225_cast_fp16, y = linear_101_cast_fp16)[name = string("x_231_cast_fp16")];
+            tensor<int32, [1]> var_2884_axes_0 = const()[name = string("op_2884_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708860480)))];
+            tensor<fp16, [1280]> blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708863104)))];
+            tensor<fp16, [1, ?, 1280]> var_2884_cast_fp16 = layer_norm(axes = var_2884_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_231_cast_fp16)[name = string("op_2884_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2893_to_fp16 = const()[name = string("op_2893_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708865728)))];
+            tensor<fp16, [5120]> var_2894_to_fp16 = const()[name = string("op_2894_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721972992)))];
+            tensor<fp16, [1, ?, 5120]> linear_102_cast_fp16 = linear(bias = var_2894_to_fp16, weight = var_2893_to_fp16, x = var_2884_cast_fp16)[name = string("linear_102_cast_fp16")];
+            string x_235_mode_0 = const()[name = string("x_235_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_235_cast_fp16 = gelu(mode = x_235_mode_0, x = linear_102_cast_fp16)[name = string("x_235_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2899_to_fp16 = const()[name = string("op_2899_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721983296)))];
+            tensor<fp16, [1280]> var_2900_to_fp16 = const()[name = string("op_2900_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735090560)))];
+            tensor<fp16, [1, ?, 1280]> linear_103_cast_fp16 = linear(bias = var_2900_to_fp16, weight = var_2899_to_fp16, x = x_235_cast_fp16)[name = string("linear_103_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_237_cast_fp16 = add(x = x_231_cast_fp16, y = linear_103_cast_fp16)[name = string("x_237_cast_fp16")];
+            tensor<int32, [4]> k_cache_53_begin_0 = const()[name = string("k_cache_53_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_53_end_0 = const()[name = string("k_cache_53_end_0"), val = tensor<int32, [4]>([14, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_53_end_mask_0 = const()[name = string("k_cache_53_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_53_squeeze_mask_0 = const()[name = string("k_cache_53_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_53_cast_fp16 = slice_by_index(begin = k_cache_53_begin_0, end = k_cache_53_end_0, end_mask = k_cache_53_end_mask_0, squeeze_mask = k_cache_53_squeeze_mask_0, x = coreml_update_state_88)[name = string("k_cache_53_cast_fp16")];
+            tensor<int32, [4]> v_cache_53_begin_0 = const()[name = string("v_cache_53_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_53_end_0 = const()[name = string("v_cache_53_end_0"), val = tensor<int32, [4]>([14, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_53_end_mask_0 = const()[name = string("v_cache_53_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_53_squeeze_mask_0 = const()[name = string("v_cache_53_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_53_cast_fp16 = slice_by_index(begin = v_cache_53_begin_0, end = v_cache_53_end_0, end_mask = v_cache_53_end_mask_0, squeeze_mask = v_cache_53_squeeze_mask_0, x = coreml_update_state_89)[name = string("v_cache_53_cast_fp16")];
+            tensor<int32, [4]> k_cache_55_begin_0 = const()[name = string("k_cache_55_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_55_end_0 = const()[name = string("k_cache_55_end_0"), val = tensor<int32, [4]>([14, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_55_end_mask_0 = const()[name = string("k_cache_55_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_55_squeeze_mask_0 = const()[name = string("k_cache_55_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_55_cast_fp16 = slice_by_index(begin = k_cache_55_begin_0, end = k_cache_55_end_0, end_mask = k_cache_55_end_mask_0, squeeze_mask = k_cache_55_squeeze_mask_0, x = read_state_2)[name = string("k_cache_55_cast_fp16")];
+            tensor<int32, [4]> v_cache_55_begin_0 = const()[name = string("v_cache_55_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_55_end_0 = const()[name = string("v_cache_55_end_0"), val = tensor<int32, [4]>([14, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_55_end_mask_0 = const()[name = string("v_cache_55_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_55_squeeze_mask_0 = const()[name = string("v_cache_55_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_55_cast_fp16 = slice_by_index(begin = v_cache_55_begin_0, end = v_cache_55_end_0, end_mask = v_cache_55_end_mask_0, squeeze_mask = v_cache_55_squeeze_mask_0, x = read_state_3)[name = string("v_cache_55_cast_fp16")];
+            int32 var_2923 = const()[name = string("op_2923"), val = int32(-1)];
+            tensor<int32, [1]> var_2941_axes_0 = const()[name = string("op_2941_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735093184)))];
+            tensor<fp16, [1280]> blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735095808)))];
+            fp16 var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_2941_cast_fp16 = layer_norm(axes = var_2941_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_237_cast_fp16)[name = string("op_2941_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2952_to_fp16 = const()[name = string("op_2952_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735098432)))];
+            tensor<fp16, [1280]> var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738375296)))];
+            tensor<fp16, [1, ?, 1280]> linear_104_cast_fp16 = linear(bias = var_2953_to_fp16, weight = var_2952_to_fp16, x = var_2941_cast_fp16)[name = string("linear_104_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2956_to_fp16 = const()[name = string("op_2956_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738377920)))];
+            tensor<fp16, [1, ?, 1280]> linear_105_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2956_to_fp16, x = var_2941_cast_fp16)[name = string("linear_105_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2960_to_fp16 = const()[name = string("op_2960_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(741654784)))];
+            tensor<fp16, [1280]> var_2961_to_fp16 = const()[name = string("op_2961_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744931648)))];
+            tensor<fp16, [1, ?, 1280]> linear_106_cast_fp16 = linear(bias = var_2961_to_fp16, weight = var_2960_to_fp16, x = var_2941_cast_fp16)[name = string("linear_106_cast_fp16")];
+            tensor<int32, [3]> var_2963_shape_cast_fp16 = shape(x = linear_104_cast_fp16)[name = string("op_2963_shape_cast_fp16")];
+            int32 gather_158_axis_0 = const()[name = string("gather_158_axis_0"), val = int32(0)];
+            int32 gather_158_batch_dims_0 = const()[name = string("gather_158_batch_dims_0"), val = int32(0)];
+            bool gather_158_validate_indices_0 = const()[name = string("gather_158_validate_indices_0"), val = bool(false)];
+            string var_2963_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2963_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_158_to_uint16 = const()[name = string("select_158_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2963_shape_cast_fp16_to_uint16 = cast(dtype = var_2963_shape_cast_fp16_to_uint16_dtype_0, x = var_2963_shape_cast_fp16)[name = string("cast_364")];
+            uint16 gather_158_cast_uint16 = gather(axis = gather_158_axis_0, batch_dims = gather_158_batch_dims_0, indices = select_158_to_uint16, validate_indices = gather_158_validate_indices_0, x = var_2963_shape_cast_fp16_to_uint16)[name = string("gather_158_cast_uint16")];
+            string gather_158_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_158_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_158_cast_uint16_to_int32 = cast(dtype = gather_158_cast_uint16_to_int32_dtype_0, x = gather_158_cast_uint16)[name = string("cast_363")];
+            int32 end_step_29 = add(x = offset, y = gather_158_cast_uint16_to_int32)[name = string("end_step_29")];
+            tensor<int32, [1]> expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = end_step_29)[name = string("expand_dims_211")];
+            tensor<int32, [1]> concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor<int32, [1]>([13])];
+            int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)];
+            bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_208, expand_dims_1, expand_dims_210))[name = string("concat_290")];
+            tensor<int32, [1]> concat_291_values0_0 = const()[name = string("concat_291_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)];
+            bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (concat_291_values0_0, concat_291_values1_0, expand_dims_211, concat_291_values3_0))[name = string("concat_291")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = k_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = k_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_14_stride_0, update = linear_105_cast_fp16, x = coreml_update_state_88)[name = string("k_cache1_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_14_cast_fp16, input = k_cache1)[name = string("coreml_update_state_90_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_90 = read_state(input = k_cache1)[name = string("coreml_update_state_90")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = v_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = v_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_14_stride_0, update = linear_106_cast_fp16, x = coreml_update_state_89)[name = string("v_cache1_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_14_cast_fp16, input = v_cache1)[name = string("coreml_update_state_91_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_91 = read_state(input = v_cache1)[name = string("coreml_update_state_91")];
+            int32 concat_296_values0_0 = const()[name = string("concat_296_values0_0"), val = int32(1)];
+            int32 concat_296_values2_0 = const()[name = string("concat_296_values2_0"), val = int32(1280)];
+            int32 concat_296_axis_0 = const()[name = string("concat_296_axis_0"), val = int32(0)];
+            bool concat_296_interleave_0 = const()[name = string("concat_296_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_296 = concat(axis = concat_296_axis_0, interleave = concat_296_interleave_0, values = (concat_296_values0_0, end_step_29, concat_296_values2_0))[name = string("concat_296")];
+            tensor<int32, [3]> var_2979_begin_0 = const()[name = string("op_2979_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2979_end_mask_0 = const()[name = string("op_2979_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2979_cast_fp16 = slice_by_index(begin = var_2979_begin_0, end = concat_296, end_mask = var_2979_end_mask_0, x = k_cache_53_cast_fp16)[name = string("op_2979_cast_fp16")];
+            tensor<int32, [3]> var_2982_begin_0 = const()[name = string("op_2982_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2982_end_mask_0 = const()[name = string("op_2982_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2982_cast_fp16 = slice_by_index(begin = var_2982_begin_0, end = concat_296, end_mask = var_2982_end_mask_0, x = v_cache_53_cast_fp16)[name = string("op_2982_cast_fp16")];
+            tensor<int32, [4]> concat_298x = const()[name = string("concat_298x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2992_cast_fp16 = reshape(shape = concat_298x, x = linear_104_cast_fp16)[name = string("op_2992_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_107_cast_fp16 = mul(x = var_2992_cast_fp16, y = const_212_to_fp16)[name = string("q_107_cast_fp16")];
+            tensor<int32, [4]> concat_299x = const()[name = string("concat_299x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2999_cast_fp16 = reshape(shape = concat_299x, x = var_2979_cast_fp16)[name = string("op_2999_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_135_cast_fp16 = mul(x = var_2999_cast_fp16, y = const_213_to_fp16)[name = string("k_135_cast_fp16")];
+            tensor<int32, [4]> concat_300x = const()[name = string("concat_300x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3006_cast_fp16 = reshape(shape = concat_300x, x = var_2982_cast_fp16)[name = string("op_3006_cast_fp16")];
+            tensor<int32, [4]> var_3007 = const()[name = string("op_3007"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_79_transpose_x_0 = const()[name = string("qk_79_transpose_x_0"), val = bool(false)];
+            bool qk_79_transpose_y_0 = const()[name = string("qk_79_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_309_perm_0 = const()[name = string("transpose_309_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_310_perm_0 = const()[name = string("transpose_310_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_310 = transpose(perm = transpose_310_perm_0, x = k_135_cast_fp16)[name = string("transpose_534")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_309 = transpose(perm = transpose_309_perm_0, x = q_107_cast_fp16)[name = string("transpose_535")];
+            tensor<fp16, [1, 20, ?, ?]> qk_79_cast_fp16 = matmul(transpose_x = qk_79_transpose_x_0, transpose_y = qk_79_transpose_y_0, x = transpose_309, y = transpose_310)[name = string("qk_79_cast_fp16")];
+            int32 concat_301_values1_0 = const()[name = string("concat_301_values1_0"), val = int32(448)];
+            int32 concat_301_axis_0 = const()[name = string("concat_301_axis_0"), val = int32(0)];
+            bool concat_301_interleave_0 = const()[name = string("concat_301_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_301 = concat(axis = concat_301_axis_0, interleave = concat_301_interleave_0, values = (gather_158_cast_uint16_to_int32, concat_301_values1_0))[name = string("concat_301")];
+            tensor<int32, [2]> var_3010_begin_0 = const()[name = string("op_3010_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3010_end_mask_0 = const()[name = string("op_3010_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3010_cast_fp16 = slice_by_index(begin = var_3010_begin_0, end = concat_301, end_mask = var_3010_end_mask_0, x = mask_to_fp16)[name = string("op_3010_cast_fp16")];
+            int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(0)];
+            int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)];
+            bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, gather_158_cast_uint16_to_int32))[name = string("concat_302")];
+            tensor<int32, [2]> var_3011_begin_0 = const()[name = string("op_3011_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3011_end_mask_0 = const()[name = string("op_3011_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3011_cast_fp16 = slice_by_index(begin = var_3011_begin_0, end = concat_302, end_mask = var_3011_end_mask_0, x = var_3010_cast_fp16)[name = string("op_3011_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_81_cast_fp16 = add(x = qk_79_cast_fp16, y = var_3011_cast_fp16)[name = string("qk_81_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_3014_cast_fp16 = softmax(axis = var_2923, x = qk_81_cast_fp16)[name = string("op_3014_cast_fp16")];
+            bool var_3016_transpose_x_0 = const()[name = string("op_3016_transpose_x_0"), val = bool(false)];
+            bool var_3016_transpose_y_0 = const()[name = string("op_3016_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_135_cast_fp16 = transpose(perm = var_3007, x = var_3006_cast_fp16)[name = string("transpose_536")];
+            tensor<fp16, [1, 20, ?, 64]> var_3016_cast_fp16 = matmul(transpose_x = var_3016_transpose_x_0, transpose_y = var_3016_transpose_y_0, x = var_3014_cast_fp16, y = v_135_cast_fp16)[name = string("op_3016_cast_fp16")];
+            tensor<int32, [4]> var_3017 = const()[name = string("op_3017"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_303x = const()[name = string("concat_303x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3018_cast_fp16 = transpose(perm = var_3017, x = var_3016_cast_fp16)[name = string("transpose_533")];
+            tensor<fp16, [1, ?, 1280]> x_241_cast_fp16 = reshape(shape = concat_303x, x = var_3018_cast_fp16)[name = string("x_241_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3022_to_fp16 = const()[name = string("op_3022_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744934272)))];
+            tensor<fp16, [1280]> var_3023_to_fp16 = const()[name = string("op_3023_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748211136)))];
+            tensor<fp16, [1, ?, 1280]> linear_107_cast_fp16 = linear(bias = var_3023_to_fp16, weight = var_3022_to_fp16, x = x_241_cast_fp16)[name = string("linear_107_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_243_cast_fp16 = add(x = x_237_cast_fp16, y = linear_107_cast_fp16)[name = string("x_243_cast_fp16")];
+            tensor<int32, [1]> var_3030_axes_0 = const()[name = string("op_3030_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_13_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748213760)))];
+            tensor<fp16, [1280]> blocks_13_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748216384)))];
+            tensor<fp16, [1, ?, 1280]> var_3030_cast_fp16 = layer_norm(axes = var_3030_axes_0, beta = blocks_13_cross_attn_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_cross_attn_ln_weight_to_fp16, x = x_243_cast_fp16)[name = string("op_3030_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3039_to_fp16 = const()[name = string("op_3039_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748219008)))];
+            tensor<fp16, [1280]> var_3040_to_fp16 = const()[name = string("op_3040_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751495872)))];
+            tensor<fp16, [1, ?, 1280]> linear_108_cast_fp16 = linear(bias = var_3040_to_fp16, weight = var_3039_to_fp16, x = var_3030_cast_fp16)[name = string("linear_108_cast_fp16")];
+            tensor<int32, [3]> concat_304 = const()[name = string("concat_304"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_305 = const()[name = string("concat_305"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_137_internal_tensor_assign_1_stride_0 = const()[name = string("k_137_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_304, begin_mask = k_137_internal_tensor_assign_1_begin_mask_0, end = concat_305, end_mask = k_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_137_internal_tensor_assign_1_squeeze_mask_0, stride = k_137_internal_tensor_assign_1_stride_0, update = k_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("k_137_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_306 = const()[name = string("concat_306"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_307 = const()[name = string("concat_307"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_137_internal_tensor_assign_1_stride_0 = const()[name = string("v_137_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_306, begin_mask = v_137_internal_tensor_assign_1_begin_mask_0, end = concat_307, end_mask = v_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_137_internal_tensor_assign_1_squeeze_mask_0, stride = v_137_internal_tensor_assign_1_stride_0, update = v_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("v_137_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_308x = const()[name = string("concat_308x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3060_cast_fp16 = reshape(shape = concat_308x, x = linear_108_cast_fp16)[name = string("op_3060_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_214_to_fp16 = const()[name = string("const_214_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_111_cast_fp16 = mul(x = var_3060_cast_fp16, y = const_214_to_fp16)[name = string("q_111_cast_fp16")];
+            tensor<int32, [4]> var_3066 = const()[name = string("op_3066"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3067_cast_fp16 = reshape(shape = var_3066, x = k_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3067_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_139_cast_fp16 = mul(x = var_3067_cast_fp16, y = const_215_to_fp16)[name = string("k_139_cast_fp16")];
+            tensor<int32, [4]> var_3073 = const()[name = string("op_3073"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3074_cast_fp16 = reshape(shape = var_3073, x = v_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3074_cast_fp16")];
+            tensor<int32, [4]> var_3075 = const()[name = string("op_3075"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_83_transpose_x_0 = const()[name = string("qk_83_transpose_x_0"), val = bool(false)];
+            bool qk_83_transpose_y_0 = const()[name = string("qk_83_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_311_perm_0 = const()[name = string("transpose_311_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_312_perm_0 = const()[name = string("transpose_312_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_312 = transpose(perm = transpose_312_perm_0, x = k_139_cast_fp16)[name = string("transpose_530")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_311 = transpose(perm = transpose_311_perm_0, x = q_111_cast_fp16)[name = string("transpose_531")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_83_cast_fp16 = matmul(transpose_x = qk_83_transpose_x_0, transpose_y = qk_83_transpose_y_0, x = transpose_311, y = transpose_312)[name = string("qk_83_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_3079_cast_fp16 = softmax(axis = var_2923, x = qk_83_cast_fp16)[name = string("op_3079_cast_fp16")];
+            bool var_3081_transpose_x_0 = const()[name = string("op_3081_transpose_x_0"), val = bool(false)];
+            bool var_3081_transpose_y_0 = const()[name = string("op_3081_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_139_cast_fp16 = transpose(perm = var_3075, x = var_3074_cast_fp16)[name = string("transpose_532")];
+            tensor<fp16, [1, 20, ?, 64]> var_3081_cast_fp16 = matmul(transpose_x = var_3081_transpose_x_0, transpose_y = var_3081_transpose_y_0, x = var_3079_cast_fp16, y = v_139_cast_fp16)[name = string("op_3081_cast_fp16")];
+            tensor<int32, [4]> var_3082 = const()[name = string("op_3082"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_309x = const()[name = string("concat_309x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3083_cast_fp16 = transpose(perm = var_3082, x = var_3081_cast_fp16)[name = string("transpose_529")];
+            tensor<fp16, [1, ?, 1280]> x_247_cast_fp16 = reshape(shape = concat_309x, x = var_3083_cast_fp16)[name = string("x_247_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3087_to_fp16 = const()[name = string("op_3087_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751498496)))];
+            tensor<fp16, [1280]> var_3088_to_fp16 = const()[name = string("op_3088_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754775360)))];
+            tensor<fp16, [1, ?, 1280]> linear_109_cast_fp16 = linear(bias = var_3088_to_fp16, weight = var_3087_to_fp16, x = x_247_cast_fp16)[name = string("linear_109_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_249_cast_fp16 = add(x = x_243_cast_fp16, y = linear_109_cast_fp16)[name = string("x_249_cast_fp16")];
+            tensor<int32, [1]> var_3095_axes_0 = const()[name = string("op_3095_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754777984)))];
+            tensor<fp16, [1280]> blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754780608)))];
+            tensor<fp16, [1, ?, 1280]> var_3095_cast_fp16 = layer_norm(axes = var_3095_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_249_cast_fp16)[name = string("op_3095_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3104_to_fp16 = const()[name = string("op_3104_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754783232)))];
+            tensor<fp16, [5120]> var_3105_to_fp16 = const()[name = string("op_3105_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767890496)))];
+            tensor<fp16, [1, ?, 5120]> linear_110_cast_fp16 = linear(bias = var_3105_to_fp16, weight = var_3104_to_fp16, x = var_3095_cast_fp16)[name = string("linear_110_cast_fp16")];
+            string x_253_mode_0 = const()[name = string("x_253_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_253_cast_fp16 = gelu(mode = x_253_mode_0, x = linear_110_cast_fp16)[name = string("x_253_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3110_to_fp16 = const()[name = string("op_3110_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767900800)))];
+            tensor<fp16, [1280]> var_3111_to_fp16 = const()[name = string("op_3111_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781008064)))];
+            tensor<fp16, [1, ?, 1280]> linear_111_cast_fp16 = linear(bias = var_3111_to_fp16, weight = var_3110_to_fp16, x = x_253_cast_fp16)[name = string("linear_111_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_255_cast_fp16 = add(x = x_249_cast_fp16, y = linear_111_cast_fp16)[name = string("x_255_cast_fp16")];
+            tensor<int32, [4]> k_cache_57_begin_0 = const()[name = string("k_cache_57_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_57_end_0 = const()[name = string("k_cache_57_end_0"), val = tensor<int32, [4]>([15, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_57_end_mask_0 = const()[name = string("k_cache_57_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_57_squeeze_mask_0 = const()[name = string("k_cache_57_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_57_cast_fp16 = slice_by_index(begin = k_cache_57_begin_0, end = k_cache_57_end_0, end_mask = k_cache_57_end_mask_0, squeeze_mask = k_cache_57_squeeze_mask_0, x = coreml_update_state_90)[name = string("k_cache_57_cast_fp16")];
+            tensor<int32, [4]> v_cache_57_begin_0 = const()[name = string("v_cache_57_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_57_end_0 = const()[name = string("v_cache_57_end_0"), val = tensor<int32, [4]>([15, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_57_end_mask_0 = const()[name = string("v_cache_57_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_57_squeeze_mask_0 = const()[name = string("v_cache_57_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_57_cast_fp16 = slice_by_index(begin = v_cache_57_begin_0, end = v_cache_57_end_0, end_mask = v_cache_57_end_mask_0, squeeze_mask = v_cache_57_squeeze_mask_0, x = coreml_update_state_91)[name = string("v_cache_57_cast_fp16")];
+            tensor<int32, [4]> k_cache_59_begin_0 = const()[name = string("k_cache_59_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_59_end_0 = const()[name = string("k_cache_59_end_0"), val = tensor<int32, [4]>([15, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_59_end_mask_0 = const()[name = string("k_cache_59_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_59_squeeze_mask_0 = const()[name = string("k_cache_59_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_59_cast_fp16 = slice_by_index(begin = k_cache_59_begin_0, end = k_cache_59_end_0, end_mask = k_cache_59_end_mask_0, squeeze_mask = k_cache_59_squeeze_mask_0, x = read_state_2)[name = string("k_cache_59_cast_fp16")];
+            tensor<int32, [4]> v_cache_59_begin_0 = const()[name = string("v_cache_59_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_59_end_0 = const()[name = string("v_cache_59_end_0"), val = tensor<int32, [4]>([15, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_59_end_mask_0 = const()[name = string("v_cache_59_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_59_squeeze_mask_0 = const()[name = string("v_cache_59_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_59_cast_fp16 = slice_by_index(begin = v_cache_59_begin_0, end = v_cache_59_end_0, end_mask = v_cache_59_end_mask_0, squeeze_mask = v_cache_59_squeeze_mask_0, x = read_state_3)[name = string("v_cache_59_cast_fp16")];
+            int32 var_3134 = const()[name = string("op_3134"), val = int32(-1)];
+            tensor<int32, [1]> var_3152_axes_0 = const()[name = string("op_3152_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781010688)))];
+            tensor<fp16, [1280]> blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781013312)))];
+            fp16 var_3140_to_fp16 = const()[name = string("op_3140_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_3152_cast_fp16 = layer_norm(axes = var_3152_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_255_cast_fp16)[name = string("op_3152_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3163_to_fp16 = const()[name = string("op_3163_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781015936)))];
+            tensor<fp16, [1280]> var_3164_to_fp16 = const()[name = string("op_3164_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784292800)))];
+            tensor<fp16, [1, ?, 1280]> linear_112_cast_fp16 = linear(bias = var_3164_to_fp16, weight = var_3163_to_fp16, x = var_3152_cast_fp16)[name = string("linear_112_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3167_to_fp16 = const()[name = string("op_3167_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784295424)))];
+            tensor<fp16, [1, ?, 1280]> linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3167_to_fp16, x = var_3152_cast_fp16)[name = string("linear_113_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3171_to_fp16 = const()[name = string("op_3171_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787572288)))];
+            tensor<fp16, [1280]> var_3172_to_fp16 = const()[name = string("op_3172_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790849152)))];
+            tensor<fp16, [1, ?, 1280]> linear_114_cast_fp16 = linear(bias = var_3172_to_fp16, weight = var_3171_to_fp16, x = var_3152_cast_fp16)[name = string("linear_114_cast_fp16")];
+            tensor<int32, [3]> var_3174_shape_cast_fp16 = shape(x = linear_112_cast_fp16)[name = string("op_3174_shape_cast_fp16")];
+            int32 gather_170_axis_0 = const()[name = string("gather_170_axis_0"), val = int32(0)];
+            int32 gather_170_batch_dims_0 = const()[name = string("gather_170_batch_dims_0"), val = int32(0)];
+            bool gather_170_validate_indices_0 = const()[name = string("gather_170_validate_indices_0"), val = bool(false)];
+            string var_3174_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3174_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_170_to_uint16 = const()[name = string("select_170_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3174_shape_cast_fp16_to_uint16 = cast(dtype = var_3174_shape_cast_fp16_to_uint16_dtype_0, x = var_3174_shape_cast_fp16)[name = string("cast_362")];
+            uint16 gather_170_cast_uint16 = gather(axis = gather_170_axis_0, batch_dims = gather_170_batch_dims_0, indices = select_170_to_uint16, validate_indices = gather_170_validate_indices_0, x = var_3174_shape_cast_fp16_to_uint16)[name = string("gather_170_cast_uint16")];
+            string gather_170_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_170_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_170_cast_uint16_to_int32 = cast(dtype = gather_170_cast_uint16_to_int32_dtype_0, x = gather_170_cast_uint16)[name = string("cast_361")];
+            int32 end_step_31 = add(x = offset, y = gather_170_cast_uint16_to_int32)[name = string("end_step_31")];
+            tensor<int32, [1]> expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = end_step_31)[name = string("expand_dims_227")];
+            tensor<int32, [1]> concat_312_values0_0 = const()[name = string("concat_312_values0_0"), val = tensor<int32, [1]>([14])];
+            int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)];
+            bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (concat_312_values0_0, expand_dims_224, expand_dims_1, expand_dims_226))[name = string("concat_312")];
+            tensor<int32, [1]> concat_313_values0_0 = const()[name = string("concat_313_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)];
+            bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (concat_313_values0_0, concat_313_values1_0, expand_dims_227, concat_313_values3_0))[name = string("concat_313")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = k_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = k_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_15_stride_0, update = linear_113_cast_fp16, x = coreml_update_state_90)[name = string("k_cache1_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_15_cast_fp16, input = k_cache1)[name = string("coreml_update_state_92_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_92 = read_state(input = k_cache1)[name = string("coreml_update_state_92")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = v_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = v_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_15_stride_0, update = linear_114_cast_fp16, x = coreml_update_state_91)[name = string("v_cache1_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_15_cast_fp16, input = v_cache1)[name = string("coreml_update_state_93_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_93 = read_state(input = v_cache1)[name = string("coreml_update_state_93")];
+            int32 concat_318_values0_0 = const()[name = string("concat_318_values0_0"), val = int32(1)];
+            int32 concat_318_values2_0 = const()[name = string("concat_318_values2_0"), val = int32(1280)];
+            int32 concat_318_axis_0 = const()[name = string("concat_318_axis_0"), val = int32(0)];
+            bool concat_318_interleave_0 = const()[name = string("concat_318_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_318 = concat(axis = concat_318_axis_0, interleave = concat_318_interleave_0, values = (concat_318_values0_0, end_step_31, concat_318_values2_0))[name = string("concat_318")];
+            tensor<int32, [3]> var_3190_begin_0 = const()[name = string("op_3190_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3190_end_mask_0 = const()[name = string("op_3190_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3190_cast_fp16 = slice_by_index(begin = var_3190_begin_0, end = concat_318, end_mask = var_3190_end_mask_0, x = k_cache_57_cast_fp16)[name = string("op_3190_cast_fp16")];
+            tensor<int32, [3]> var_3193_begin_0 = const()[name = string("op_3193_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3193_end_mask_0 = const()[name = string("op_3193_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3193_cast_fp16 = slice_by_index(begin = var_3193_begin_0, end = concat_318, end_mask = var_3193_end_mask_0, x = v_cache_57_cast_fp16)[name = string("op_3193_cast_fp16")];
+            tensor<int32, [4]> concat_320x = const()[name = string("concat_320x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3203_cast_fp16 = reshape(shape = concat_320x, x = linear_112_cast_fp16)[name = string("op_3203_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_216_to_fp16 = const()[name = string("const_216_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_115_cast_fp16 = mul(x = var_3203_cast_fp16, y = const_216_to_fp16)[name = string("q_115_cast_fp16")];
+            tensor<int32, [4]> concat_321x = const()[name = string("concat_321x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3210_cast_fp16 = reshape(shape = concat_321x, x = var_3190_cast_fp16)[name = string("op_3210_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_217_to_fp16 = const()[name = string("const_217_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_145_cast_fp16 = mul(x = var_3210_cast_fp16, y = const_217_to_fp16)[name = string("k_145_cast_fp16")];
+            tensor<int32, [4]> concat_322x = const()[name = string("concat_322x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3217_cast_fp16 = reshape(shape = concat_322x, x = var_3193_cast_fp16)[name = string("op_3217_cast_fp16")];
+            tensor<int32, [4]> var_3218 = const()[name = string("op_3218"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_85_transpose_x_0 = const()[name = string("qk_85_transpose_x_0"), val = bool(false)];
+            bool qk_85_transpose_y_0 = const()[name = string("qk_85_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_313_perm_0 = const()[name = string("transpose_313_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_314_perm_0 = const()[name = string("transpose_314_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_314 = transpose(perm = transpose_314_perm_0, x = k_145_cast_fp16)[name = string("transpose_526")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_313 = transpose(perm = transpose_313_perm_0, x = q_115_cast_fp16)[name = string("transpose_527")];
+            tensor<fp16, [1, 20, ?, ?]> qk_85_cast_fp16 = matmul(transpose_x = qk_85_transpose_x_0, transpose_y = qk_85_transpose_y_0, x = transpose_313, y = transpose_314)[name = string("qk_85_cast_fp16")];
+            int32 concat_323_values1_0 = const()[name = string("concat_323_values1_0"), val = int32(448)];
+            int32 concat_323_axis_0 = const()[name = string("concat_323_axis_0"), val = int32(0)];
+            bool concat_323_interleave_0 = const()[name = string("concat_323_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_323 = concat(axis = concat_323_axis_0, interleave = concat_323_interleave_0, values = (gather_170_cast_uint16_to_int32, concat_323_values1_0))[name = string("concat_323")];
+            tensor<int32, [2]> var_3221_begin_0 = const()[name = string("op_3221_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3221_end_mask_0 = const()[name = string("op_3221_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3221_cast_fp16 = slice_by_index(begin = var_3221_begin_0, end = concat_323, end_mask = var_3221_end_mask_0, x = mask_to_fp16)[name = string("op_3221_cast_fp16")];
+            int32 concat_324_values0_0 = const()[name = string("concat_324_values0_0"), val = int32(0)];
+            int32 concat_324_axis_0 = const()[name = string("concat_324_axis_0"), val = int32(0)];
+            bool concat_324_interleave_0 = const()[name = string("concat_324_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_324 = concat(axis = concat_324_axis_0, interleave = concat_324_interleave_0, values = (concat_324_values0_0, gather_170_cast_uint16_to_int32))[name = string("concat_324")];
+            tensor<int32, [2]> var_3222_begin_0 = const()[name = string("op_3222_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3222_end_mask_0 = const()[name = string("op_3222_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3222_cast_fp16 = slice_by_index(begin = var_3222_begin_0, end = concat_324, end_mask = var_3222_end_mask_0, x = var_3221_cast_fp16)[name = string("op_3222_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_87_cast_fp16 = add(x = qk_85_cast_fp16, y = var_3222_cast_fp16)[name = string("qk_87_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_3225_cast_fp16 = softmax(axis = var_3134, x = qk_87_cast_fp16)[name = string("op_3225_cast_fp16")];
+            bool var_3227_transpose_x_0 = const()[name = string("op_3227_transpose_x_0"), val = bool(false)];
+            bool var_3227_transpose_y_0 = const()[name = string("op_3227_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_145_cast_fp16 = transpose(perm = var_3218, x = var_3217_cast_fp16)[name = string("transpose_528")];
+            tensor<fp16, [1, 20, ?, 64]> var_3227_cast_fp16 = matmul(transpose_x = var_3227_transpose_x_0, transpose_y = var_3227_transpose_y_0, x = var_3225_cast_fp16, y = v_145_cast_fp16)[name = string("op_3227_cast_fp16")];
+            tensor<int32, [4]> var_3228 = const()[name = string("op_3228"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_325x = const()[name = string("concat_325x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3229_cast_fp16 = transpose(perm = var_3228, x = var_3227_cast_fp16)[name = string("transpose_525")];
+            tensor<fp16, [1, ?, 1280]> x_259_cast_fp16 = reshape(shape = concat_325x, x = var_3229_cast_fp16)[name = string("x_259_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3233_to_fp16 = const()[name = string("op_3233_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790851776)))];
+            tensor<fp16, [1280]> var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794128640)))];
+            tensor<fp16, [1, ?, 1280]> linear_115_cast_fp16 = linear(bias = var_3234_to_fp16, weight = var_3233_to_fp16, x = x_259_cast_fp16)[name = string("linear_115_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_261_cast_fp16 = add(x = x_255_cast_fp16, y = linear_115_cast_fp16)[name = string("x_261_cast_fp16")];
+            tensor<int32, [1]> var_3241_axes_0 = const()[name = string("op_3241_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_14_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794131264)))];
+            tensor<fp16, [1280]> blocks_14_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794133888)))];
+            tensor<fp16, [1, ?, 1280]> var_3241_cast_fp16 = layer_norm(axes = var_3241_axes_0, beta = blocks_14_cross_attn_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_cross_attn_ln_weight_to_fp16, x = x_261_cast_fp16)[name = string("op_3241_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3250_to_fp16 = const()[name = string("op_3250_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794136512)))];
+            tensor<fp16, [1280]> var_3251_to_fp16 = const()[name = string("op_3251_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797413376)))];
+            tensor<fp16, [1, ?, 1280]> linear_116_cast_fp16 = linear(bias = var_3251_to_fp16, weight = var_3250_to_fp16, x = var_3241_cast_fp16)[name = string("linear_116_cast_fp16")];
+            tensor<int32, [3]> concat_326 = const()[name = string("concat_326"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_327 = const()[name = string("concat_327"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_147_internal_tensor_assign_1_stride_0 = const()[name = string("k_147_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_326, begin_mask = k_147_internal_tensor_assign_1_begin_mask_0, end = concat_327, end_mask = k_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_147_internal_tensor_assign_1_squeeze_mask_0, stride = k_147_internal_tensor_assign_1_stride_0, update = k_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("k_147_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_328 = const()[name = string("concat_328"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_329 = const()[name = string("concat_329"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_147_internal_tensor_assign_1_stride_0 = const()[name = string("v_147_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_328, begin_mask = v_147_internal_tensor_assign_1_begin_mask_0, end = concat_329, end_mask = v_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_147_internal_tensor_assign_1_squeeze_mask_0, stride = v_147_internal_tensor_assign_1_stride_0, update = v_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("v_147_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_330x = const()[name = string("concat_330x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3271_cast_fp16 = reshape(shape = concat_330x, x = linear_116_cast_fp16)[name = string("op_3271_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_218_to_fp16 = const()[name = string("const_218_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_119_cast_fp16 = mul(x = var_3271_cast_fp16, y = const_218_to_fp16)[name = string("q_119_cast_fp16")];
+            tensor<int32, [4]> var_3277 = const()[name = string("op_3277"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3278_cast_fp16 = reshape(shape = var_3277, x = k_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3278_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_219_to_fp16 = const()[name = string("const_219_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_149_cast_fp16 = mul(x = var_3278_cast_fp16, y = const_219_to_fp16)[name = string("k_149_cast_fp16")];
+            tensor<int32, [4]> var_3284 = const()[name = string("op_3284"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3285_cast_fp16 = reshape(shape = var_3284, x = v_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3285_cast_fp16")];
+            tensor<int32, [4]> var_3286 = const()[name = string("op_3286"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_89_transpose_x_0 = const()[name = string("qk_89_transpose_x_0"), val = bool(false)];
+            bool qk_89_transpose_y_0 = const()[name = string("qk_89_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_315_perm_0 = const()[name = string("transpose_315_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_316_perm_0 = const()[name = string("transpose_316_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_316 = transpose(perm = transpose_316_perm_0, x = k_149_cast_fp16)[name = string("transpose_522")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_315 = transpose(perm = transpose_315_perm_0, x = q_119_cast_fp16)[name = string("transpose_523")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_89_cast_fp16 = matmul(transpose_x = qk_89_transpose_x_0, transpose_y = qk_89_transpose_y_0, x = transpose_315, y = transpose_316)[name = string("qk_89_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_3290_cast_fp16 = softmax(axis = var_3134, x = qk_89_cast_fp16)[name = string("op_3290_cast_fp16")];
+            bool var_3292_transpose_x_0 = const()[name = string("op_3292_transpose_x_0"), val = bool(false)];
+            bool var_3292_transpose_y_0 = const()[name = string("op_3292_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_149_cast_fp16 = transpose(perm = var_3286, x = var_3285_cast_fp16)[name = string("transpose_524")];
+            tensor<fp16, [1, 20, ?, 64]> var_3292_cast_fp16 = matmul(transpose_x = var_3292_transpose_x_0, transpose_y = var_3292_transpose_y_0, x = var_3290_cast_fp16, y = v_149_cast_fp16)[name = string("op_3292_cast_fp16")];
+            tensor<int32, [4]> var_3293 = const()[name = string("op_3293"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_331x = const()[name = string("concat_331x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3294_cast_fp16 = transpose(perm = var_3293, x = var_3292_cast_fp16)[name = string("transpose_521")];
+            tensor<fp16, [1, ?, 1280]> x_265_cast_fp16 = reshape(shape = concat_331x, x = var_3294_cast_fp16)[name = string("x_265_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3298_to_fp16 = const()[name = string("op_3298_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797416000)))];
+            tensor<fp16, [1280]> var_3299_to_fp16 = const()[name = string("op_3299_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800692864)))];
+            tensor<fp16, [1, ?, 1280]> linear_117_cast_fp16 = linear(bias = var_3299_to_fp16, weight = var_3298_to_fp16, x = x_265_cast_fp16)[name = string("linear_117_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_267_cast_fp16 = add(x = x_261_cast_fp16, y = linear_117_cast_fp16)[name = string("x_267_cast_fp16")];
+            tensor<int32, [1]> var_3306_axes_0 = const()[name = string("op_3306_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800695488)))];
+            tensor<fp16, [1280]> blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800698112)))];
+            tensor<fp16, [1, ?, 1280]> var_3306_cast_fp16 = layer_norm(axes = var_3306_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_267_cast_fp16)[name = string("op_3306_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3315_to_fp16 = const()[name = string("op_3315_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800700736)))];
+            tensor<fp16, [5120]> var_3316_to_fp16 = const()[name = string("op_3316_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813808000)))];
+            tensor<fp16, [1, ?, 5120]> linear_118_cast_fp16 = linear(bias = var_3316_to_fp16, weight = var_3315_to_fp16, x = var_3306_cast_fp16)[name = string("linear_118_cast_fp16")];
+            string x_271_mode_0 = const()[name = string("x_271_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_271_cast_fp16 = gelu(mode = x_271_mode_0, x = linear_118_cast_fp16)[name = string("x_271_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3321_to_fp16 = const()[name = string("op_3321_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813818304)))];
+            tensor<fp16, [1280]> var_3322_to_fp16 = const()[name = string("op_3322_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826925568)))];
+            tensor<fp16, [1, ?, 1280]> linear_119_cast_fp16 = linear(bias = var_3322_to_fp16, weight = var_3321_to_fp16, x = x_271_cast_fp16)[name = string("linear_119_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_273_cast_fp16 = add(x = x_267_cast_fp16, y = linear_119_cast_fp16)[name = string("x_273_cast_fp16")];
+            tensor<int32, [4]> k_cache_61_begin_0 = const()[name = string("k_cache_61_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_61_end_0 = const()[name = string("k_cache_61_end_0"), val = tensor<int32, [4]>([16, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_61_end_mask_0 = const()[name = string("k_cache_61_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_61_squeeze_mask_0 = const()[name = string("k_cache_61_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_61_cast_fp16 = slice_by_index(begin = k_cache_61_begin_0, end = k_cache_61_end_0, end_mask = k_cache_61_end_mask_0, squeeze_mask = k_cache_61_squeeze_mask_0, x = coreml_update_state_92)[name = string("k_cache_61_cast_fp16")];
+            tensor<int32, [4]> v_cache_61_begin_0 = const()[name = string("v_cache_61_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_61_end_0 = const()[name = string("v_cache_61_end_0"), val = tensor<int32, [4]>([16, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_61_end_mask_0 = const()[name = string("v_cache_61_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_61_squeeze_mask_0 = const()[name = string("v_cache_61_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_61_cast_fp16 = slice_by_index(begin = v_cache_61_begin_0, end = v_cache_61_end_0, end_mask = v_cache_61_end_mask_0, squeeze_mask = v_cache_61_squeeze_mask_0, x = coreml_update_state_93)[name = string("v_cache_61_cast_fp16")];
+            tensor<int32, [4]> k_cache_63_begin_0 = const()[name = string("k_cache_63_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_63_end_0 = const()[name = string("k_cache_63_end_0"), val = tensor<int32, [4]>([16, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_63_end_mask_0 = const()[name = string("k_cache_63_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_63_squeeze_mask_0 = const()[name = string("k_cache_63_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_63_cast_fp16 = slice_by_index(begin = k_cache_63_begin_0, end = k_cache_63_end_0, end_mask = k_cache_63_end_mask_0, squeeze_mask = k_cache_63_squeeze_mask_0, x = read_state_2)[name = string("k_cache_63_cast_fp16")];
+            tensor<int32, [4]> v_cache_63_begin_0 = const()[name = string("v_cache_63_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_63_end_0 = const()[name = string("v_cache_63_end_0"), val = tensor<int32, [4]>([16, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_63_end_mask_0 = const()[name = string("v_cache_63_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_63_squeeze_mask_0 = const()[name = string("v_cache_63_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_63_cast_fp16 = slice_by_index(begin = v_cache_63_begin_0, end = v_cache_63_end_0, end_mask = v_cache_63_end_mask_0, squeeze_mask = v_cache_63_squeeze_mask_0, x = read_state_3)[name = string("v_cache_63_cast_fp16")];
+            int32 var_3345 = const()[name = string("op_3345"), val = int32(-1)];
+            tensor<int32, [1]> var_3363_axes_0 = const()[name = string("op_3363_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826928192)))];
+            tensor<fp16, [1280]> blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826930816)))];
+            fp16 var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_3363_cast_fp16 = layer_norm(axes = var_3363_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_273_cast_fp16)[name = string("op_3363_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3374_to_fp16 = const()[name = string("op_3374_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826933440)))];
+            tensor<fp16, [1280]> var_3375_to_fp16 = const()[name = string("op_3375_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830210304)))];
+            tensor<fp16, [1, ?, 1280]> linear_120_cast_fp16 = linear(bias = var_3375_to_fp16, weight = var_3374_to_fp16, x = var_3363_cast_fp16)[name = string("linear_120_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3378_to_fp16 = const()[name = string("op_3378_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830212928)))];
+            tensor<fp16, [1, ?, 1280]> linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3378_to_fp16, x = var_3363_cast_fp16)[name = string("linear_121_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3382_to_fp16 = const()[name = string("op_3382_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833489792)))];
+            tensor<fp16, [1280]> var_3383_to_fp16 = const()[name = string("op_3383_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(836766656)))];
+            tensor<fp16, [1, ?, 1280]> linear_122_cast_fp16 = linear(bias = var_3383_to_fp16, weight = var_3382_to_fp16, x = var_3363_cast_fp16)[name = string("linear_122_cast_fp16")];
+            tensor<int32, [3]> var_3385_shape_cast_fp16 = shape(x = linear_120_cast_fp16)[name = string("op_3385_shape_cast_fp16")];
+            int32 gather_182_axis_0 = const()[name = string("gather_182_axis_0"), val = int32(0)];
+            int32 gather_182_batch_dims_0 = const()[name = string("gather_182_batch_dims_0"), val = int32(0)];
+            bool gather_182_validate_indices_0 = const()[name = string("gather_182_validate_indices_0"), val = bool(false)];
+            string var_3385_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3385_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_182_to_uint16 = const()[name = string("select_182_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3385_shape_cast_fp16_to_uint16 = cast(dtype = var_3385_shape_cast_fp16_to_uint16_dtype_0, x = var_3385_shape_cast_fp16)[name = string("cast_360")];
+            uint16 gather_182_cast_uint16 = gather(axis = gather_182_axis_0, batch_dims = gather_182_batch_dims_0, indices = select_182_to_uint16, validate_indices = gather_182_validate_indices_0, x = var_3385_shape_cast_fp16_to_uint16)[name = string("gather_182_cast_uint16")];
+            string gather_182_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_182_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_182_cast_uint16_to_int32 = cast(dtype = gather_182_cast_uint16_to_int32_dtype_0, x = gather_182_cast_uint16)[name = string("cast_359")];
+            int32 end_step_33 = add(x = offset, y = gather_182_cast_uint16_to_int32)[name = string("end_step_33")];
+            tensor<int32, [1]> expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = end_step_33)[name = string("expand_dims_243")];
+            tensor<int32, [1]> concat_334_values0_0 = const()[name = string("concat_334_values0_0"), val = tensor<int32, [1]>([15])];
+            int32 concat_334_axis_0 = const()[name = string("concat_334_axis_0"), val = int32(0)];
+            bool concat_334_interleave_0 = const()[name = string("concat_334_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_334 = concat(axis = concat_334_axis_0, interleave = concat_334_interleave_0, values = (concat_334_values0_0, expand_dims_240, expand_dims_1, expand_dims_242))[name = string("concat_334")];
+            tensor<int32, [1]> concat_335_values0_0 = const()[name = string("concat_335_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_335_values1_0 = const()[name = string("concat_335_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_335_values3_0 = const()[name = string("concat_335_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_335_axis_0 = const()[name = string("concat_335_axis_0"), val = int32(0)];
+            bool concat_335_interleave_0 = const()[name = string("concat_335_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_335 = concat(axis = concat_335_axis_0, interleave = concat_335_interleave_0, values = (concat_335_values0_0, concat_335_values1_0, expand_dims_243, concat_335_values3_0))[name = string("concat_335")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = k_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = k_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_16_stride_0, update = linear_121_cast_fp16, x = coreml_update_state_92)[name = string("k_cache1_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_16_cast_fp16, input = k_cache1)[name = string("coreml_update_state_94_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_94 = read_state(input = k_cache1)[name = string("coreml_update_state_94")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = v_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = v_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_16_stride_0, update = linear_122_cast_fp16, x = coreml_update_state_93)[name = string("v_cache1_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_16_cast_fp16, input = v_cache1)[name = string("coreml_update_state_95_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_95 = read_state(input = v_cache1)[name = string("coreml_update_state_95")];
+            int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)];
+            int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(1280)];
+            int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)];
+            bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, end_step_33, concat_340_values2_0))[name = string("concat_340")];
+            tensor<int32, [3]> var_3401_begin_0 = const()[name = string("op_3401_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3401_end_mask_0 = const()[name = string("op_3401_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3401_cast_fp16 = slice_by_index(begin = var_3401_begin_0, end = concat_340, end_mask = var_3401_end_mask_0, x = k_cache_61_cast_fp16)[name = string("op_3401_cast_fp16")];
+            tensor<int32, [3]> var_3404_begin_0 = const()[name = string("op_3404_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3404_end_mask_0 = const()[name = string("op_3404_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3404_cast_fp16 = slice_by_index(begin = var_3404_begin_0, end = concat_340, end_mask = var_3404_end_mask_0, x = v_cache_61_cast_fp16)[name = string("op_3404_cast_fp16")];
+            tensor<int32, [4]> concat_342x = const()[name = string("concat_342x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3414_cast_fp16 = reshape(shape = concat_342x, x = linear_120_cast_fp16)[name = string("op_3414_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_220_to_fp16 = const()[name = string("const_220_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_123_cast_fp16 = mul(x = var_3414_cast_fp16, y = const_220_to_fp16)[name = string("q_123_cast_fp16")];
+            tensor<int32, [4]> concat_343x = const()[name = string("concat_343x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3421_cast_fp16 = reshape(shape = concat_343x, x = var_3401_cast_fp16)[name = string("op_3421_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_221_to_fp16 = const()[name = string("const_221_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_155_cast_fp16 = mul(x = var_3421_cast_fp16, y = const_221_to_fp16)[name = string("k_155_cast_fp16")];
+            tensor<int32, [4]> concat_344x = const()[name = string("concat_344x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3428_cast_fp16 = reshape(shape = concat_344x, x = var_3404_cast_fp16)[name = string("op_3428_cast_fp16")];
+            tensor<int32, [4]> var_3429 = const()[name = string("op_3429"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_91_transpose_x_0 = const()[name = string("qk_91_transpose_x_0"), val = bool(false)];
+            bool qk_91_transpose_y_0 = const()[name = string("qk_91_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_317_perm_0 = const()[name = string("transpose_317_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_318_perm_0 = const()[name = string("transpose_318_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_318 = transpose(perm = transpose_318_perm_0, x = k_155_cast_fp16)[name = string("transpose_518")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_317 = transpose(perm = transpose_317_perm_0, x = q_123_cast_fp16)[name = string("transpose_519")];
+            tensor<fp16, [1, 20, ?, ?]> qk_91_cast_fp16 = matmul(transpose_x = qk_91_transpose_x_0, transpose_y = qk_91_transpose_y_0, x = transpose_317, y = transpose_318)[name = string("qk_91_cast_fp16")];
+            int32 concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = int32(448)];
+            int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)];
+            bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (gather_182_cast_uint16_to_int32, concat_345_values1_0))[name = string("concat_345")];
+            tensor<int32, [2]> var_3432_begin_0 = const()[name = string("op_3432_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3432_end_mask_0 = const()[name = string("op_3432_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3432_cast_fp16 = slice_by_index(begin = var_3432_begin_0, end = concat_345, end_mask = var_3432_end_mask_0, x = mask_to_fp16)[name = string("op_3432_cast_fp16")];
+            int32 concat_346_values0_0 = const()[name = string("concat_346_values0_0"), val = int32(0)];
+            int32 concat_346_axis_0 = const()[name = string("concat_346_axis_0"), val = int32(0)];
+            bool concat_346_interleave_0 = const()[name = string("concat_346_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_346 = concat(axis = concat_346_axis_0, interleave = concat_346_interleave_0, values = (concat_346_values0_0, gather_182_cast_uint16_to_int32))[name = string("concat_346")];
+            tensor<int32, [2]> var_3433_begin_0 = const()[name = string("op_3433_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3433_end_mask_0 = const()[name = string("op_3433_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3433_cast_fp16 = slice_by_index(begin = var_3433_begin_0, end = concat_346, end_mask = var_3433_end_mask_0, x = var_3432_cast_fp16)[name = string("op_3433_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_93_cast_fp16 = add(x = qk_91_cast_fp16, y = var_3433_cast_fp16)[name = string("qk_93_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_3436_cast_fp16 = softmax(axis = var_3345, x = qk_93_cast_fp16)[name = string("op_3436_cast_fp16")];
+            bool var_3438_transpose_x_0 = const()[name = string("op_3438_transpose_x_0"), val = bool(false)];
+            bool var_3438_transpose_y_0 = const()[name = string("op_3438_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_155_cast_fp16 = transpose(perm = var_3429, x = var_3428_cast_fp16)[name = string("transpose_520")];
+            tensor<fp16, [1, 20, ?, 64]> var_3438_cast_fp16 = matmul(transpose_x = var_3438_transpose_x_0, transpose_y = var_3438_transpose_y_0, x = var_3436_cast_fp16, y = v_155_cast_fp16)[name = string("op_3438_cast_fp16")];
+            tensor<int32, [4]> var_3439 = const()[name = string("op_3439"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_347x = const()[name = string("concat_347x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3440_cast_fp16 = transpose(perm = var_3439, x = var_3438_cast_fp16)[name = string("transpose_517")];
+            tensor<fp16, [1, ?, 1280]> x_277_cast_fp16 = reshape(shape = concat_347x, x = var_3440_cast_fp16)[name = string("x_277_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3444_to_fp16 = const()[name = string("op_3444_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(836769280)))];
+            tensor<fp16, [1280]> var_3445_to_fp16 = const()[name = string("op_3445_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840046144)))];
+            tensor<fp16, [1, ?, 1280]> linear_123_cast_fp16 = linear(bias = var_3445_to_fp16, weight = var_3444_to_fp16, x = x_277_cast_fp16)[name = string("linear_123_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_279_cast_fp16 = add(x = x_273_cast_fp16, y = linear_123_cast_fp16)[name = string("x_279_cast_fp16")];
+            tensor<int32, [1]> var_3452_axes_0 = const()[name = string("op_3452_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_15_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840048768)))];
+            tensor<fp16, [1280]> blocks_15_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840051392)))];
+            tensor<fp16, [1, ?, 1280]> var_3452_cast_fp16 = layer_norm(axes = var_3452_axes_0, beta = blocks_15_cross_attn_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_cross_attn_ln_weight_to_fp16, x = x_279_cast_fp16)[name = string("op_3452_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3461_to_fp16 = const()[name = string("op_3461_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840054016)))];
+            tensor<fp16, [1280]> var_3462_to_fp16 = const()[name = string("op_3462_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843330880)))];
+            tensor<fp16, [1, ?, 1280]> linear_124_cast_fp16 = linear(bias = var_3462_to_fp16, weight = var_3461_to_fp16, x = var_3452_cast_fp16)[name = string("linear_124_cast_fp16")];
+            tensor<int32, [3]> concat_348 = const()[name = string("concat_348"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_349 = const()[name = string("concat_349"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_157_internal_tensor_assign_1_stride_0 = const()[name = string("k_157_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_348, begin_mask = k_157_internal_tensor_assign_1_begin_mask_0, end = concat_349, end_mask = k_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_157_internal_tensor_assign_1_squeeze_mask_0, stride = k_157_internal_tensor_assign_1_stride_0, update = k_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("k_157_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_350 = const()[name = string("concat_350"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_351 = const()[name = string("concat_351"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_157_internal_tensor_assign_1_stride_0 = const()[name = string("v_157_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_350, begin_mask = v_157_internal_tensor_assign_1_begin_mask_0, end = concat_351, end_mask = v_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_157_internal_tensor_assign_1_squeeze_mask_0, stride = v_157_internal_tensor_assign_1_stride_0, update = v_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("v_157_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_352x = const()[name = string("concat_352x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3482_cast_fp16 = reshape(shape = concat_352x, x = linear_124_cast_fp16)[name = string("op_3482_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_222_to_fp16 = const()[name = string("const_222_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_127_cast_fp16 = mul(x = var_3482_cast_fp16, y = const_222_to_fp16)[name = string("q_127_cast_fp16")];
+            tensor<int32, [4]> var_3488 = const()[name = string("op_3488"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3489_cast_fp16 = reshape(shape = var_3488, x = k_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3489_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_223_to_fp16 = const()[name = string("const_223_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_159_cast_fp16 = mul(x = var_3489_cast_fp16, y = const_223_to_fp16)[name = string("k_159_cast_fp16")];
+            tensor<int32, [4]> var_3495 = const()[name = string("op_3495"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3496_cast_fp16 = reshape(shape = var_3495, x = v_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3496_cast_fp16")];
+            tensor<int32, [4]> var_3497 = const()[name = string("op_3497"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_95_transpose_x_0 = const()[name = string("qk_95_transpose_x_0"), val = bool(false)];
+            bool qk_95_transpose_y_0 = const()[name = string("qk_95_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_319_perm_0 = const()[name = string("transpose_319_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_320_perm_0 = const()[name = string("transpose_320_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_320 = transpose(perm = transpose_320_perm_0, x = k_159_cast_fp16)[name = string("transpose_514")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_319 = transpose(perm = transpose_319_perm_0, x = q_127_cast_fp16)[name = string("transpose_515")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_95_cast_fp16 = matmul(transpose_x = qk_95_transpose_x_0, transpose_y = qk_95_transpose_y_0, x = transpose_319, y = transpose_320)[name = string("qk_95_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_3501_cast_fp16 = softmax(axis = var_3345, x = qk_95_cast_fp16)[name = string("op_3501_cast_fp16")];
+            bool var_3503_transpose_x_0 = const()[name = string("op_3503_transpose_x_0"), val = bool(false)];
+            bool var_3503_transpose_y_0 = const()[name = string("op_3503_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_159_cast_fp16 = transpose(perm = var_3497, x = var_3496_cast_fp16)[name = string("transpose_516")];
+            tensor<fp16, [1, 20, ?, 64]> var_3503_cast_fp16 = matmul(transpose_x = var_3503_transpose_x_0, transpose_y = var_3503_transpose_y_0, x = var_3501_cast_fp16, y = v_159_cast_fp16)[name = string("op_3503_cast_fp16")];
+            tensor<int32, [4]> var_3504 = const()[name = string("op_3504"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_353x = const()[name = string("concat_353x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3505_cast_fp16 = transpose(perm = var_3504, x = var_3503_cast_fp16)[name = string("transpose_513")];
+            tensor<fp16, [1, ?, 1280]> x_283_cast_fp16 = reshape(shape = concat_353x, x = var_3505_cast_fp16)[name = string("x_283_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3509_to_fp16 = const()[name = string("op_3509_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843333504)))];
+            tensor<fp16, [1280]> var_3510_to_fp16 = const()[name = string("op_3510_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846610368)))];
+            tensor<fp16, [1, ?, 1280]> linear_125_cast_fp16 = linear(bias = var_3510_to_fp16, weight = var_3509_to_fp16, x = x_283_cast_fp16)[name = string("linear_125_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_125_cast_fp16)[name = string("x_285_cast_fp16")];
+            tensor<int32, [1]> var_3517_axes_0 = const()[name = string("op_3517_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846612992)))];
+            tensor<fp16, [1280]> blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846615616)))];
+            tensor<fp16, [1, ?, 1280]> var_3517_cast_fp16 = layer_norm(axes = var_3517_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_285_cast_fp16)[name = string("op_3517_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3526_to_fp16 = const()[name = string("op_3526_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846618240)))];
+            tensor<fp16, [5120]> var_3527_to_fp16 = const()[name = string("op_3527_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859725504)))];
+            tensor<fp16, [1, ?, 5120]> linear_126_cast_fp16 = linear(bias = var_3527_to_fp16, weight = var_3526_to_fp16, x = var_3517_cast_fp16)[name = string("linear_126_cast_fp16")];
+            string x_289_mode_0 = const()[name = string("x_289_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_289_cast_fp16 = gelu(mode = x_289_mode_0, x = linear_126_cast_fp16)[name = string("x_289_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3532_to_fp16 = const()[name = string("op_3532_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859735808)))];
+            tensor<fp16, [1280]> var_3533_to_fp16 = const()[name = string("op_3533_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872843072)))];
+            tensor<fp16, [1, ?, 1280]> linear_127_cast_fp16 = linear(bias = var_3533_to_fp16, weight = var_3532_to_fp16, x = x_289_cast_fp16)[name = string("linear_127_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_291_cast_fp16 = add(x = x_285_cast_fp16, y = linear_127_cast_fp16)[name = string("x_291_cast_fp16")];
+            tensor<int32, [4]> k_cache_65_begin_0 = const()[name = string("k_cache_65_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_65_end_0 = const()[name = string("k_cache_65_end_0"), val = tensor<int32, [4]>([17, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_65_end_mask_0 = const()[name = string("k_cache_65_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_65_squeeze_mask_0 = const()[name = string("k_cache_65_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_65_cast_fp16 = slice_by_index(begin = k_cache_65_begin_0, end = k_cache_65_end_0, end_mask = k_cache_65_end_mask_0, squeeze_mask = k_cache_65_squeeze_mask_0, x = coreml_update_state_94)[name = string("k_cache_65_cast_fp16")];
+            tensor<int32, [4]> v_cache_65_begin_0 = const()[name = string("v_cache_65_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_65_end_0 = const()[name = string("v_cache_65_end_0"), val = tensor<int32, [4]>([17, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_65_end_mask_0 = const()[name = string("v_cache_65_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_65_squeeze_mask_0 = const()[name = string("v_cache_65_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_65_cast_fp16 = slice_by_index(begin = v_cache_65_begin_0, end = v_cache_65_end_0, end_mask = v_cache_65_end_mask_0, squeeze_mask = v_cache_65_squeeze_mask_0, x = coreml_update_state_95)[name = string("v_cache_65_cast_fp16")];
+            tensor<int32, [4]> k_cache_67_begin_0 = const()[name = string("k_cache_67_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_67_end_0 = const()[name = string("k_cache_67_end_0"), val = tensor<int32, [4]>([17, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_67_end_mask_0 = const()[name = string("k_cache_67_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_67_squeeze_mask_0 = const()[name = string("k_cache_67_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_67_cast_fp16 = slice_by_index(begin = k_cache_67_begin_0, end = k_cache_67_end_0, end_mask = k_cache_67_end_mask_0, squeeze_mask = k_cache_67_squeeze_mask_0, x = read_state_2)[name = string("k_cache_67_cast_fp16")];
+            tensor<int32, [4]> v_cache_67_begin_0 = const()[name = string("v_cache_67_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_67_end_0 = const()[name = string("v_cache_67_end_0"), val = tensor<int32, [4]>([17, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_67_end_mask_0 = const()[name = string("v_cache_67_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_67_squeeze_mask_0 = const()[name = string("v_cache_67_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_67_cast_fp16 = slice_by_index(begin = v_cache_67_begin_0, end = v_cache_67_end_0, end_mask = v_cache_67_end_mask_0, squeeze_mask = v_cache_67_squeeze_mask_0, x = read_state_3)[name = string("v_cache_67_cast_fp16")];
+            int32 var_3556 = const()[name = string("op_3556"), val = int32(-1)];
+            tensor<int32, [1]> var_3574_axes_0 = const()[name = string("op_3574_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872845696)))];
+            tensor<fp16, [1280]> blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872848320)))];
+            fp16 var_3562_to_fp16 = const()[name = string("op_3562_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_3574_cast_fp16 = layer_norm(axes = var_3574_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_291_cast_fp16)[name = string("op_3574_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3585_to_fp16 = const()[name = string("op_3585_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872850944)))];
+            tensor<fp16, [1280]> var_3586_to_fp16 = const()[name = string("op_3586_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876127808)))];
+            tensor<fp16, [1, ?, 1280]> linear_128_cast_fp16 = linear(bias = var_3586_to_fp16, weight = var_3585_to_fp16, x = var_3574_cast_fp16)[name = string("linear_128_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3589_to_fp16 = const()[name = string("op_3589_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876130432)))];
+            tensor<fp16, [1, ?, 1280]> linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3589_to_fp16, x = var_3574_cast_fp16)[name = string("linear_129_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3593_to_fp16 = const()[name = string("op_3593_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(879407296)))];
+            tensor<fp16, [1280]> var_3594_to_fp16 = const()[name = string("op_3594_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(882684160)))];
+            tensor<fp16, [1, ?, 1280]> linear_130_cast_fp16 = linear(bias = var_3594_to_fp16, weight = var_3593_to_fp16, x = var_3574_cast_fp16)[name = string("linear_130_cast_fp16")];
+            tensor<int32, [3]> var_3596_shape_cast_fp16 = shape(x = linear_128_cast_fp16)[name = string("op_3596_shape_cast_fp16")];
+            int32 gather_194_axis_0 = const()[name = string("gather_194_axis_0"), val = int32(0)];
+            int32 gather_194_batch_dims_0 = const()[name = string("gather_194_batch_dims_0"), val = int32(0)];
+            bool gather_194_validate_indices_0 = const()[name = string("gather_194_validate_indices_0"), val = bool(false)];
+            string var_3596_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3596_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_194_to_uint16 = const()[name = string("select_194_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3596_shape_cast_fp16_to_uint16 = cast(dtype = var_3596_shape_cast_fp16_to_uint16_dtype_0, x = var_3596_shape_cast_fp16)[name = string("cast_358")];
+            uint16 gather_194_cast_uint16 = gather(axis = gather_194_axis_0, batch_dims = gather_194_batch_dims_0, indices = select_194_to_uint16, validate_indices = gather_194_validate_indices_0, x = var_3596_shape_cast_fp16_to_uint16)[name = string("gather_194_cast_uint16")];
+            string gather_194_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_194_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_194_cast_uint16_to_int32 = cast(dtype = gather_194_cast_uint16_to_int32_dtype_0, x = gather_194_cast_uint16)[name = string("cast_357")];
+            int32 end_step_35 = add(x = offset, y = gather_194_cast_uint16_to_int32)[name = string("end_step_35")];
+            tensor<int32, [1]> expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = end_step_35)[name = string("expand_dims_259")];
+            tensor<int32, [1]> concat_356_values0_0 = const()[name = string("concat_356_values0_0"), val = tensor<int32, [1]>([16])];
+            int32 concat_356_axis_0 = const()[name = string("concat_356_axis_0"), val = int32(0)];
+            bool concat_356_interleave_0 = const()[name = string("concat_356_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_356 = concat(axis = concat_356_axis_0, interleave = concat_356_interleave_0, values = (concat_356_values0_0, expand_dims_256, expand_dims_1, expand_dims_258))[name = string("concat_356")];
+            tensor<int32, [1]> concat_357_values0_0 = const()[name = string("concat_357_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_357_values1_0 = const()[name = string("concat_357_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_357_values3_0 = const()[name = string("concat_357_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)];
+            bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (concat_357_values0_0, concat_357_values1_0, expand_dims_259, concat_357_values3_0))[name = string("concat_357")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = k_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = k_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_17_stride_0, update = linear_129_cast_fp16, x = coreml_update_state_94)[name = string("k_cache1_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_17_cast_fp16, input = k_cache1)[name = string("coreml_update_state_96_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_96 = read_state(input = k_cache1)[name = string("coreml_update_state_96")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = v_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = v_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_17_stride_0, update = linear_130_cast_fp16, x = coreml_update_state_95)[name = string("v_cache1_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_17_cast_fp16, input = v_cache1)[name = string("coreml_update_state_97_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_97 = read_state(input = v_cache1)[name = string("coreml_update_state_97")];
+            int32 concat_362_values0_0 = const()[name = string("concat_362_values0_0"), val = int32(1)];
+            int32 concat_362_values2_0 = const()[name = string("concat_362_values2_0"), val = int32(1280)];
+            int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)];
+            bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (concat_362_values0_0, end_step_35, concat_362_values2_0))[name = string("concat_362")];
+            tensor<int32, [3]> var_3612_begin_0 = const()[name = string("op_3612_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3612_end_mask_0 = const()[name = string("op_3612_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3612_cast_fp16 = slice_by_index(begin = var_3612_begin_0, end = concat_362, end_mask = var_3612_end_mask_0, x = k_cache_65_cast_fp16)[name = string("op_3612_cast_fp16")];
+            tensor<int32, [3]> var_3615_begin_0 = const()[name = string("op_3615_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3615_end_mask_0 = const()[name = string("op_3615_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3615_cast_fp16 = slice_by_index(begin = var_3615_begin_0, end = concat_362, end_mask = var_3615_end_mask_0, x = v_cache_65_cast_fp16)[name = string("op_3615_cast_fp16")];
+            tensor<int32, [4]> concat_364x = const()[name = string("concat_364x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3625_cast_fp16 = reshape(shape = concat_364x, x = linear_128_cast_fp16)[name = string("op_3625_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_224_to_fp16 = const()[name = string("const_224_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_131_cast_fp16 = mul(x = var_3625_cast_fp16, y = const_224_to_fp16)[name = string("q_131_cast_fp16")];
+            tensor<int32, [4]> concat_365x = const()[name = string("concat_365x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3632_cast_fp16 = reshape(shape = concat_365x, x = var_3612_cast_fp16)[name = string("op_3632_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_165_cast_fp16 = mul(x = var_3632_cast_fp16, y = const_225_to_fp16)[name = string("k_165_cast_fp16")];
+            tensor<int32, [4]> concat_366x = const()[name = string("concat_366x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3639_cast_fp16 = reshape(shape = concat_366x, x = var_3615_cast_fp16)[name = string("op_3639_cast_fp16")];
+            tensor<int32, [4]> var_3640 = const()[name = string("op_3640"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_97_transpose_x_0 = const()[name = string("qk_97_transpose_x_0"), val = bool(false)];
+            bool qk_97_transpose_y_0 = const()[name = string("qk_97_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_321_perm_0 = const()[name = string("transpose_321_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_322_perm_0 = const()[name = string("transpose_322_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_322 = transpose(perm = transpose_322_perm_0, x = k_165_cast_fp16)[name = string("transpose_510")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_321 = transpose(perm = transpose_321_perm_0, x = q_131_cast_fp16)[name = string("transpose_511")];
+            tensor<fp16, [1, 20, ?, ?]> qk_97_cast_fp16 = matmul(transpose_x = qk_97_transpose_x_0, transpose_y = qk_97_transpose_y_0, x = transpose_321, y = transpose_322)[name = string("qk_97_cast_fp16")];
+            int32 concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = int32(448)];
+            int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)];
+            bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (gather_194_cast_uint16_to_int32, concat_367_values1_0))[name = string("concat_367")];
+            tensor<int32, [2]> var_3643_begin_0 = const()[name = string("op_3643_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3643_end_mask_0 = const()[name = string("op_3643_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3643_cast_fp16 = slice_by_index(begin = var_3643_begin_0, end = concat_367, end_mask = var_3643_end_mask_0, x = mask_to_fp16)[name = string("op_3643_cast_fp16")];
+            int32 concat_368_values0_0 = const()[name = string("concat_368_values0_0"), val = int32(0)];
+            int32 concat_368_axis_0 = const()[name = string("concat_368_axis_0"), val = int32(0)];
+            bool concat_368_interleave_0 = const()[name = string("concat_368_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_368 = concat(axis = concat_368_axis_0, interleave = concat_368_interleave_0, values = (concat_368_values0_0, gather_194_cast_uint16_to_int32))[name = string("concat_368")];
+            tensor<int32, [2]> var_3644_begin_0 = const()[name = string("op_3644_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3644_end_mask_0 = const()[name = string("op_3644_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3644_cast_fp16 = slice_by_index(begin = var_3644_begin_0, end = concat_368, end_mask = var_3644_end_mask_0, x = var_3643_cast_fp16)[name = string("op_3644_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_99_cast_fp16 = add(x = qk_97_cast_fp16, y = var_3644_cast_fp16)[name = string("qk_99_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_3647_cast_fp16 = softmax(axis = var_3556, x = qk_99_cast_fp16)[name = string("op_3647_cast_fp16")];
+            bool var_3649_transpose_x_0 = const()[name = string("op_3649_transpose_x_0"), val = bool(false)];
+            bool var_3649_transpose_y_0 = const()[name = string("op_3649_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_165_cast_fp16 = transpose(perm = var_3640, x = var_3639_cast_fp16)[name = string("transpose_512")];
+            tensor<fp16, [1, 20, ?, 64]> var_3649_cast_fp16 = matmul(transpose_x = var_3649_transpose_x_0, transpose_y = var_3649_transpose_y_0, x = var_3647_cast_fp16, y = v_165_cast_fp16)[name = string("op_3649_cast_fp16")];
+            tensor<int32, [4]> var_3650 = const()[name = string("op_3650"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_369x = const()[name = string("concat_369x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3651_cast_fp16 = transpose(perm = var_3650, x = var_3649_cast_fp16)[name = string("transpose_509")];
+            tensor<fp16, [1, ?, 1280]> x_295_cast_fp16 = reshape(shape = concat_369x, x = var_3651_cast_fp16)[name = string("x_295_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3655_to_fp16 = const()[name = string("op_3655_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(882686784)))];
+            tensor<fp16, [1280]> var_3656_to_fp16 = const()[name = string("op_3656_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885963648)))];
+            tensor<fp16, [1, ?, 1280]> linear_131_cast_fp16 = linear(bias = var_3656_to_fp16, weight = var_3655_to_fp16, x = x_295_cast_fp16)[name = string("linear_131_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_297_cast_fp16 = add(x = x_291_cast_fp16, y = linear_131_cast_fp16)[name = string("x_297_cast_fp16")];
+            tensor<int32, [1]> var_3663_axes_0 = const()[name = string("op_3663_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_16_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885966272)))];
+            tensor<fp16, [1280]> blocks_16_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885968896)))];
+            tensor<fp16, [1, ?, 1280]> var_3663_cast_fp16 = layer_norm(axes = var_3663_axes_0, beta = blocks_16_cross_attn_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_cross_attn_ln_weight_to_fp16, x = x_297_cast_fp16)[name = string("op_3663_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3672_to_fp16 = const()[name = string("op_3672_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885971520)))];
+            tensor<fp16, [1280]> var_3673_to_fp16 = const()[name = string("op_3673_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889248384)))];
+            tensor<fp16, [1, ?, 1280]> linear_132_cast_fp16 = linear(bias = var_3673_to_fp16, weight = var_3672_to_fp16, x = var_3663_cast_fp16)[name = string("linear_132_cast_fp16")];
+            tensor<int32, [3]> concat_370 = const()[name = string("concat_370"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_371 = const()[name = string("concat_371"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_167_internal_tensor_assign_1_stride_0 = const()[name = string("k_167_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_370, begin_mask = k_167_internal_tensor_assign_1_begin_mask_0, end = concat_371, end_mask = k_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_167_internal_tensor_assign_1_squeeze_mask_0, stride = k_167_internal_tensor_assign_1_stride_0, update = k_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("k_167_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_372 = const()[name = string("concat_372"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_373 = const()[name = string("concat_373"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_167_internal_tensor_assign_1_stride_0 = const()[name = string("v_167_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_372, begin_mask = v_167_internal_tensor_assign_1_begin_mask_0, end = concat_373, end_mask = v_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_167_internal_tensor_assign_1_squeeze_mask_0, stride = v_167_internal_tensor_assign_1_stride_0, update = v_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("v_167_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_374x = const()[name = string("concat_374x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3693_cast_fp16 = reshape(shape = concat_374x, x = linear_132_cast_fp16)[name = string("op_3693_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_226_to_fp16 = const()[name = string("const_226_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_135_cast_fp16 = mul(x = var_3693_cast_fp16, y = const_226_to_fp16)[name = string("q_135_cast_fp16")];
+            tensor<int32, [4]> var_3699 = const()[name = string("op_3699"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3700_cast_fp16 = reshape(shape = var_3699, x = k_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3700_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_227_to_fp16 = const()[name = string("const_227_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_169_cast_fp16 = mul(x = var_3700_cast_fp16, y = const_227_to_fp16)[name = string("k_169_cast_fp16")];
+            tensor<int32, [4]> var_3706 = const()[name = string("op_3706"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3707_cast_fp16 = reshape(shape = var_3706, x = v_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3707_cast_fp16")];
+            tensor<int32, [4]> var_3708 = const()[name = string("op_3708"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_101_transpose_x_0 = const()[name = string("qk_101_transpose_x_0"), val = bool(false)];
+            bool qk_101_transpose_y_0 = const()[name = string("qk_101_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_323_perm_0 = const()[name = string("transpose_323_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_324_perm_0 = const()[name = string("transpose_324_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_324 = transpose(perm = transpose_324_perm_0, x = k_169_cast_fp16)[name = string("transpose_506")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_323 = transpose(perm = transpose_323_perm_0, x = q_135_cast_fp16)[name = string("transpose_507")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_101_cast_fp16 = matmul(transpose_x = qk_101_transpose_x_0, transpose_y = qk_101_transpose_y_0, x = transpose_323, y = transpose_324)[name = string("qk_101_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_3712_cast_fp16 = softmax(axis = var_3556, x = qk_101_cast_fp16)[name = string("op_3712_cast_fp16")];
+            bool var_3714_transpose_x_0 = const()[name = string("op_3714_transpose_x_0"), val = bool(false)];
+            bool var_3714_transpose_y_0 = const()[name = string("op_3714_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_169_cast_fp16 = transpose(perm = var_3708, x = var_3707_cast_fp16)[name = string("transpose_508")];
+            tensor<fp16, [1, 20, ?, 64]> var_3714_cast_fp16 = matmul(transpose_x = var_3714_transpose_x_0, transpose_y = var_3714_transpose_y_0, x = var_3712_cast_fp16, y = v_169_cast_fp16)[name = string("op_3714_cast_fp16")];
+            tensor<int32, [4]> var_3715 = const()[name = string("op_3715"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_375x = const()[name = string("concat_375x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3716_cast_fp16 = transpose(perm = var_3715, x = var_3714_cast_fp16)[name = string("transpose_505")];
+            tensor<fp16, [1, ?, 1280]> x_301_cast_fp16 = reshape(shape = concat_375x, x = var_3716_cast_fp16)[name = string("x_301_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3720_to_fp16 = const()[name = string("op_3720_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889251008)))];
+            tensor<fp16, [1280]> var_3721_to_fp16 = const()[name = string("op_3721_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892527872)))];
+            tensor<fp16, [1, ?, 1280]> linear_133_cast_fp16 = linear(bias = var_3721_to_fp16, weight = var_3720_to_fp16, x = x_301_cast_fp16)[name = string("linear_133_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_303_cast_fp16 = add(x = x_297_cast_fp16, y = linear_133_cast_fp16)[name = string("x_303_cast_fp16")];
+            tensor<int32, [1]> var_3728_axes_0 = const()[name = string("op_3728_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892530496)))];
+            tensor<fp16, [1280]> blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892533120)))];
+            tensor<fp16, [1, ?, 1280]> var_3728_cast_fp16 = layer_norm(axes = var_3728_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_303_cast_fp16)[name = string("op_3728_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3737_to_fp16 = const()[name = string("op_3737_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892535744)))];
+            tensor<fp16, [5120]> var_3738_to_fp16 = const()[name = string("op_3738_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905643008)))];
+            tensor<fp16, [1, ?, 5120]> linear_134_cast_fp16 = linear(bias = var_3738_to_fp16, weight = var_3737_to_fp16, x = var_3728_cast_fp16)[name = string("linear_134_cast_fp16")];
+            string x_307_mode_0 = const()[name = string("x_307_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_307_cast_fp16 = gelu(mode = x_307_mode_0, x = linear_134_cast_fp16)[name = string("x_307_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3743_to_fp16 = const()[name = string("op_3743_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905653312)))];
+            tensor<fp16, [1280]> var_3744_to_fp16 = const()[name = string("op_3744_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918760576)))];
+            tensor<fp16, [1, ?, 1280]> linear_135_cast_fp16 = linear(bias = var_3744_to_fp16, weight = var_3743_to_fp16, x = x_307_cast_fp16)[name = string("linear_135_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_309_cast_fp16 = add(x = x_303_cast_fp16, y = linear_135_cast_fp16)[name = string("x_309_cast_fp16")];
+            tensor<int32, [4]> k_cache_69_begin_0 = const()[name = string("k_cache_69_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_69_end_0 = const()[name = string("k_cache_69_end_0"), val = tensor<int32, [4]>([18, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_69_end_mask_0 = const()[name = string("k_cache_69_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_69_squeeze_mask_0 = const()[name = string("k_cache_69_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_69_cast_fp16 = slice_by_index(begin = k_cache_69_begin_0, end = k_cache_69_end_0, end_mask = k_cache_69_end_mask_0, squeeze_mask = k_cache_69_squeeze_mask_0, x = coreml_update_state_96)[name = string("k_cache_69_cast_fp16")];
+            tensor<int32, [4]> v_cache_69_begin_0 = const()[name = string("v_cache_69_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_69_end_0 = const()[name = string("v_cache_69_end_0"), val = tensor<int32, [4]>([18, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_69_end_mask_0 = const()[name = string("v_cache_69_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_69_squeeze_mask_0 = const()[name = string("v_cache_69_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_69_cast_fp16 = slice_by_index(begin = v_cache_69_begin_0, end = v_cache_69_end_0, end_mask = v_cache_69_end_mask_0, squeeze_mask = v_cache_69_squeeze_mask_0, x = coreml_update_state_97)[name = string("v_cache_69_cast_fp16")];
+            tensor<int32, [4]> k_cache_71_begin_0 = const()[name = string("k_cache_71_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_71_end_0 = const()[name = string("k_cache_71_end_0"), val = tensor<int32, [4]>([18, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_71_end_mask_0 = const()[name = string("k_cache_71_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_71_squeeze_mask_0 = const()[name = string("k_cache_71_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_71_cast_fp16 = slice_by_index(begin = k_cache_71_begin_0, end = k_cache_71_end_0, end_mask = k_cache_71_end_mask_0, squeeze_mask = k_cache_71_squeeze_mask_0, x = read_state_2)[name = string("k_cache_71_cast_fp16")];
+            tensor<int32, [4]> v_cache_71_begin_0 = const()[name = string("v_cache_71_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_71_end_0 = const()[name = string("v_cache_71_end_0"), val = tensor<int32, [4]>([18, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_71_end_mask_0 = const()[name = string("v_cache_71_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_71_squeeze_mask_0 = const()[name = string("v_cache_71_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_71_cast_fp16 = slice_by_index(begin = v_cache_71_begin_0, end = v_cache_71_end_0, end_mask = v_cache_71_end_mask_0, squeeze_mask = v_cache_71_squeeze_mask_0, x = read_state_3)[name = string("v_cache_71_cast_fp16")];
+            int32 var_3767 = const()[name = string("op_3767"), val = int32(-1)];
+            tensor<int32, [1]> var_3785_axes_0 = const()[name = string("op_3785_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918763200)))];
+            tensor<fp16, [1280]> blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918765824)))];
+            fp16 var_3773_to_fp16 = const()[name = string("op_3773_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_3785_cast_fp16 = layer_norm(axes = var_3785_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_309_cast_fp16)[name = string("op_3785_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3796_to_fp16 = const()[name = string("op_3796_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918768448)))];
+            tensor<fp16, [1280]> var_3797_to_fp16 = const()[name = string("op_3797_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922045312)))];
+            tensor<fp16, [1, ?, 1280]> linear_136_cast_fp16 = linear(bias = var_3797_to_fp16, weight = var_3796_to_fp16, x = var_3785_cast_fp16)[name = string("linear_136_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3800_to_fp16 = const()[name = string("op_3800_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922047936)))];
+            tensor<fp16, [1, ?, 1280]> linear_137_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3800_to_fp16, x = var_3785_cast_fp16)[name = string("linear_137_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3804_to_fp16 = const()[name = string("op_3804_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(925324800)))];
+            tensor<fp16, [1280]> var_3805_to_fp16 = const()[name = string("op_3805_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928601664)))];
+            tensor<fp16, [1, ?, 1280]> linear_138_cast_fp16 = linear(bias = var_3805_to_fp16, weight = var_3804_to_fp16, x = var_3785_cast_fp16)[name = string("linear_138_cast_fp16")];
+            tensor<int32, [3]> var_3807_shape_cast_fp16 = shape(x = linear_136_cast_fp16)[name = string("op_3807_shape_cast_fp16")];
+            int32 gather_206_axis_0 = const()[name = string("gather_206_axis_0"), val = int32(0)];
+            int32 gather_206_batch_dims_0 = const()[name = string("gather_206_batch_dims_0"), val = int32(0)];
+            bool gather_206_validate_indices_0 = const()[name = string("gather_206_validate_indices_0"), val = bool(false)];
+            string var_3807_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3807_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_206_to_uint16 = const()[name = string("select_206_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3807_shape_cast_fp16_to_uint16 = cast(dtype = var_3807_shape_cast_fp16_to_uint16_dtype_0, x = var_3807_shape_cast_fp16)[name = string("cast_356")];
+            uint16 gather_206_cast_uint16 = gather(axis = gather_206_axis_0, batch_dims = gather_206_batch_dims_0, indices = select_206_to_uint16, validate_indices = gather_206_validate_indices_0, x = var_3807_shape_cast_fp16_to_uint16)[name = string("gather_206_cast_uint16")];
+            string gather_206_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_206_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_206_cast_uint16_to_int32 = cast(dtype = gather_206_cast_uint16_to_int32_dtype_0, x = gather_206_cast_uint16)[name = string("cast_355")];
+            int32 end_step_37 = add(x = offset, y = gather_206_cast_uint16_to_int32)[name = string("end_step_37")];
+            tensor<int32, [1]> expand_dims_272 = const()[name = string("expand_dims_272"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_275_axes_0 = const()[name = string("expand_dims_275_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_275 = expand_dims(axes = expand_dims_275_axes_0, x = end_step_37)[name = string("expand_dims_275")];
+            tensor<int32, [1]> concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = tensor<int32, [1]>([17])];
+            int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)];
+            bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, expand_dims_272, expand_dims_1, expand_dims_274))[name = string("concat_378")];
+            tensor<int32, [1]> concat_379_values0_0 = const()[name = string("concat_379_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_379_values1_0 = const()[name = string("concat_379_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_379_values3_0 = const()[name = string("concat_379_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)];
+            bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (concat_379_values0_0, concat_379_values1_0, expand_dims_275, concat_379_values3_0))[name = string("concat_379")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = k_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = k_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_18_stride_0, update = linear_137_cast_fp16, x = coreml_update_state_96)[name = string("k_cache1_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_18_cast_fp16, input = k_cache1)[name = string("coreml_update_state_98_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_98 = read_state(input = k_cache1)[name = string("coreml_update_state_98")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = v_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = v_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_18_stride_0, update = linear_138_cast_fp16, x = coreml_update_state_97)[name = string("v_cache1_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_18_cast_fp16, input = v_cache1)[name = string("coreml_update_state_99_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_99 = read_state(input = v_cache1)[name = string("coreml_update_state_99")];
+            int32 concat_384_values0_0 = const()[name = string("concat_384_values0_0"), val = int32(1)];
+            int32 concat_384_values2_0 = const()[name = string("concat_384_values2_0"), val = int32(1280)];
+            int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)];
+            bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (concat_384_values0_0, end_step_37, concat_384_values2_0))[name = string("concat_384")];
+            tensor<int32, [3]> var_3823_begin_0 = const()[name = string("op_3823_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3823_end_mask_0 = const()[name = string("op_3823_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3823_cast_fp16 = slice_by_index(begin = var_3823_begin_0, end = concat_384, end_mask = var_3823_end_mask_0, x = k_cache_69_cast_fp16)[name = string("op_3823_cast_fp16")];
+            tensor<int32, [3]> var_3826_begin_0 = const()[name = string("op_3826_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3826_end_mask_0 = const()[name = string("op_3826_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3826_cast_fp16 = slice_by_index(begin = var_3826_begin_0, end = concat_384, end_mask = var_3826_end_mask_0, x = v_cache_69_cast_fp16)[name = string("op_3826_cast_fp16")];
+            tensor<int32, [4]> concat_386x = const()[name = string("concat_386x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3836_cast_fp16 = reshape(shape = concat_386x, x = linear_136_cast_fp16)[name = string("op_3836_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_228_to_fp16 = const()[name = string("const_228_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_139_cast_fp16 = mul(x = var_3836_cast_fp16, y = const_228_to_fp16)[name = string("q_139_cast_fp16")];
+            tensor<int32, [4]> concat_387x = const()[name = string("concat_387x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3843_cast_fp16 = reshape(shape = concat_387x, x = var_3823_cast_fp16)[name = string("op_3843_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_229_to_fp16 = const()[name = string("const_229_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_175_cast_fp16 = mul(x = var_3843_cast_fp16, y = const_229_to_fp16)[name = string("k_175_cast_fp16")];
+            tensor<int32, [4]> concat_388x = const()[name = string("concat_388x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3850_cast_fp16 = reshape(shape = concat_388x, x = var_3826_cast_fp16)[name = string("op_3850_cast_fp16")];
+            tensor<int32, [4]> var_3851 = const()[name = string("op_3851"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_103_transpose_x_0 = const()[name = string("qk_103_transpose_x_0"), val = bool(false)];
+            bool qk_103_transpose_y_0 = const()[name = string("qk_103_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_325_perm_0 = const()[name = string("transpose_325_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_326_perm_0 = const()[name = string("transpose_326_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_326 = transpose(perm = transpose_326_perm_0, x = k_175_cast_fp16)[name = string("transpose_502")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_325 = transpose(perm = transpose_325_perm_0, x = q_139_cast_fp16)[name = string("transpose_503")];
+            tensor<fp16, [1, 20, ?, ?]> qk_103_cast_fp16 = matmul(transpose_x = qk_103_transpose_x_0, transpose_y = qk_103_transpose_y_0, x = transpose_325, y = transpose_326)[name = string("qk_103_cast_fp16")];
+            int32 concat_389_values1_0 = const()[name = string("concat_389_values1_0"), val = int32(448)];
+            int32 concat_389_axis_0 = const()[name = string("concat_389_axis_0"), val = int32(0)];
+            bool concat_389_interleave_0 = const()[name = string("concat_389_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_389 = concat(axis = concat_389_axis_0, interleave = concat_389_interleave_0, values = (gather_206_cast_uint16_to_int32, concat_389_values1_0))[name = string("concat_389")];
+            tensor<int32, [2]> var_3854_begin_0 = const()[name = string("op_3854_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3854_end_mask_0 = const()[name = string("op_3854_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3854_cast_fp16 = slice_by_index(begin = var_3854_begin_0, end = concat_389, end_mask = var_3854_end_mask_0, x = mask_to_fp16)[name = string("op_3854_cast_fp16")];
+            int32 concat_390_values0_0 = const()[name = string("concat_390_values0_0"), val = int32(0)];
+            int32 concat_390_axis_0 = const()[name = string("concat_390_axis_0"), val = int32(0)];
+            bool concat_390_interleave_0 = const()[name = string("concat_390_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_390 = concat(axis = concat_390_axis_0, interleave = concat_390_interleave_0, values = (concat_390_values0_0, gather_206_cast_uint16_to_int32))[name = string("concat_390")];
+            tensor<int32, [2]> var_3855_begin_0 = const()[name = string("op_3855_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3855_end_mask_0 = const()[name = string("op_3855_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3855_cast_fp16 = slice_by_index(begin = var_3855_begin_0, end = concat_390, end_mask = var_3855_end_mask_0, x = var_3854_cast_fp16)[name = string("op_3855_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_105_cast_fp16 = add(x = qk_103_cast_fp16, y = var_3855_cast_fp16)[name = string("qk_105_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_3858_cast_fp16 = softmax(axis = var_3767, x = qk_105_cast_fp16)[name = string("op_3858_cast_fp16")];
+            bool var_3860_transpose_x_0 = const()[name = string("op_3860_transpose_x_0"), val = bool(false)];
+            bool var_3860_transpose_y_0 = const()[name = string("op_3860_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_175_cast_fp16 = transpose(perm = var_3851, x = var_3850_cast_fp16)[name = string("transpose_504")];
+            tensor<fp16, [1, 20, ?, 64]> var_3860_cast_fp16 = matmul(transpose_x = var_3860_transpose_x_0, transpose_y = var_3860_transpose_y_0, x = var_3858_cast_fp16, y = v_175_cast_fp16)[name = string("op_3860_cast_fp16")];
+            tensor<int32, [4]> var_3861 = const()[name = string("op_3861"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_391x = const()[name = string("concat_391x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3862_cast_fp16 = transpose(perm = var_3861, x = var_3860_cast_fp16)[name = string("transpose_501")];
+            tensor<fp16, [1, ?, 1280]> x_313_cast_fp16 = reshape(shape = concat_391x, x = var_3862_cast_fp16)[name = string("x_313_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3866_to_fp16 = const()[name = string("op_3866_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928604288)))];
+            tensor<fp16, [1280]> var_3867_to_fp16 = const()[name = string("op_3867_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931881152)))];
+            tensor<fp16, [1, ?, 1280]> linear_139_cast_fp16 = linear(bias = var_3867_to_fp16, weight = var_3866_to_fp16, x = x_313_cast_fp16)[name = string("linear_139_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_315_cast_fp16 = add(x = x_309_cast_fp16, y = linear_139_cast_fp16)[name = string("x_315_cast_fp16")];
+            tensor<int32, [1]> var_3874_axes_0 = const()[name = string("op_3874_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_17_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931883776)))];
+            tensor<fp16, [1280]> blocks_17_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931886400)))];
+            tensor<fp16, [1, ?, 1280]> var_3874_cast_fp16 = layer_norm(axes = var_3874_axes_0, beta = blocks_17_cross_attn_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_cross_attn_ln_weight_to_fp16, x = x_315_cast_fp16)[name = string("op_3874_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3883_to_fp16 = const()[name = string("op_3883_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931889024)))];
+            tensor<fp16, [1280]> var_3884_to_fp16 = const()[name = string("op_3884_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935165888)))];
+            tensor<fp16, [1, ?, 1280]> linear_140_cast_fp16 = linear(bias = var_3884_to_fp16, weight = var_3883_to_fp16, x = var_3874_cast_fp16)[name = string("linear_140_cast_fp16")];
+            tensor<int32, [3]> concat_392 = const()[name = string("concat_392"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_393 = const()[name = string("concat_393"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_177_internal_tensor_assign_1_stride_0 = const()[name = string("k_177_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_392, begin_mask = k_177_internal_tensor_assign_1_begin_mask_0, end = concat_393, end_mask = k_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_177_internal_tensor_assign_1_squeeze_mask_0, stride = k_177_internal_tensor_assign_1_stride_0, update = k_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("k_177_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_394 = const()[name = string("concat_394"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_395 = const()[name = string("concat_395"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_177_internal_tensor_assign_1_stride_0 = const()[name = string("v_177_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_394, begin_mask = v_177_internal_tensor_assign_1_begin_mask_0, end = concat_395, end_mask = v_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_177_internal_tensor_assign_1_squeeze_mask_0, stride = v_177_internal_tensor_assign_1_stride_0, update = v_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("v_177_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_396x = const()[name = string("concat_396x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3904_cast_fp16 = reshape(shape = concat_396x, x = linear_140_cast_fp16)[name = string("op_3904_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_143_cast_fp16 = mul(x = var_3904_cast_fp16, y = const_230_to_fp16)[name = string("q_143_cast_fp16")];
+            tensor<int32, [4]> var_3910 = const()[name = string("op_3910"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3911_cast_fp16 = reshape(shape = var_3910, x = k_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3911_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_179_cast_fp16 = mul(x = var_3911_cast_fp16, y = const_231_to_fp16)[name = string("k_179_cast_fp16")];
+            tensor<int32, [4]> var_3917 = const()[name = string("op_3917"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3918_cast_fp16 = reshape(shape = var_3917, x = v_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3918_cast_fp16")];
+            tensor<int32, [4]> var_3919 = const()[name = string("op_3919"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_107_transpose_x_0 = const()[name = string("qk_107_transpose_x_0"), val = bool(false)];
+            bool qk_107_transpose_y_0 = const()[name = string("qk_107_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_327_perm_0 = const()[name = string("transpose_327_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_328_perm_0 = const()[name = string("transpose_328_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_328 = transpose(perm = transpose_328_perm_0, x = k_179_cast_fp16)[name = string("transpose_498")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_327 = transpose(perm = transpose_327_perm_0, x = q_143_cast_fp16)[name = string("transpose_499")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_107_cast_fp16 = matmul(transpose_x = qk_107_transpose_x_0, transpose_y = qk_107_transpose_y_0, x = transpose_327, y = transpose_328)[name = string("qk_107_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_3923_cast_fp16 = softmax(axis = var_3767, x = qk_107_cast_fp16)[name = string("op_3923_cast_fp16")];
+            bool var_3925_transpose_x_0 = const()[name = string("op_3925_transpose_x_0"), val = bool(false)];
+            bool var_3925_transpose_y_0 = const()[name = string("op_3925_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_179_cast_fp16 = transpose(perm = var_3919, x = var_3918_cast_fp16)[name = string("transpose_500")];
+            tensor<fp16, [1, 20, ?, 64]> var_3925_cast_fp16 = matmul(transpose_x = var_3925_transpose_x_0, transpose_y = var_3925_transpose_y_0, x = var_3923_cast_fp16, y = v_179_cast_fp16)[name = string("op_3925_cast_fp16")];
+            tensor<int32, [4]> var_3926 = const()[name = string("op_3926"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_397x = const()[name = string("concat_397x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3927_cast_fp16 = transpose(perm = var_3926, x = var_3925_cast_fp16)[name = string("transpose_497")];
+            tensor<fp16, [1, ?, 1280]> x_319_cast_fp16 = reshape(shape = concat_397x, x = var_3927_cast_fp16)[name = string("x_319_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3931_to_fp16 = const()[name = string("op_3931_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935168512)))];
+            tensor<fp16, [1280]> var_3932_to_fp16 = const()[name = string("op_3932_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938445376)))];
+            tensor<fp16, [1, ?, 1280]> linear_141_cast_fp16 = linear(bias = var_3932_to_fp16, weight = var_3931_to_fp16, x = x_319_cast_fp16)[name = string("linear_141_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_321_cast_fp16 = add(x = x_315_cast_fp16, y = linear_141_cast_fp16)[name = string("x_321_cast_fp16")];
+            tensor<int32, [1]> var_3939_axes_0 = const()[name = string("op_3939_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938448000)))];
+            tensor<fp16, [1280]> blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938450624)))];
+            tensor<fp16, [1, ?, 1280]> var_3939_cast_fp16 = layer_norm(axes = var_3939_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_321_cast_fp16)[name = string("op_3939_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3948_to_fp16 = const()[name = string("op_3948_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938453248)))];
+            tensor<fp16, [5120]> var_3949_to_fp16 = const()[name = string("op_3949_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951560512)))];
+            tensor<fp16, [1, ?, 5120]> linear_142_cast_fp16 = linear(bias = var_3949_to_fp16, weight = var_3948_to_fp16, x = var_3939_cast_fp16)[name = string("linear_142_cast_fp16")];
+            string x_325_mode_0 = const()[name = string("x_325_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_325_cast_fp16 = gelu(mode = x_325_mode_0, x = linear_142_cast_fp16)[name = string("x_325_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3954_to_fp16 = const()[name = string("op_3954_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951570816)))];
+            tensor<fp16, [1280]> var_3955_to_fp16 = const()[name = string("op_3955_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964678080)))];
+            tensor<fp16, [1, ?, 1280]> linear_143_cast_fp16 = linear(bias = var_3955_to_fp16, weight = var_3954_to_fp16, x = x_325_cast_fp16)[name = string("linear_143_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_327_cast_fp16 = add(x = x_321_cast_fp16, y = linear_143_cast_fp16)[name = string("x_327_cast_fp16")];
+            tensor<int32, [4]> k_cache_73_begin_0 = const()[name = string("k_cache_73_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_73_end_0 = const()[name = string("k_cache_73_end_0"), val = tensor<int32, [4]>([19, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_73_end_mask_0 = const()[name = string("k_cache_73_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_73_squeeze_mask_0 = const()[name = string("k_cache_73_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_73_cast_fp16 = slice_by_index(begin = k_cache_73_begin_0, end = k_cache_73_end_0, end_mask = k_cache_73_end_mask_0, squeeze_mask = k_cache_73_squeeze_mask_0, x = coreml_update_state_98)[name = string("k_cache_73_cast_fp16")];
+            tensor<int32, [4]> v_cache_73_begin_0 = const()[name = string("v_cache_73_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_73_end_0 = const()[name = string("v_cache_73_end_0"), val = tensor<int32, [4]>([19, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_73_end_mask_0 = const()[name = string("v_cache_73_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_73_squeeze_mask_0 = const()[name = string("v_cache_73_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_73_cast_fp16 = slice_by_index(begin = v_cache_73_begin_0, end = v_cache_73_end_0, end_mask = v_cache_73_end_mask_0, squeeze_mask = v_cache_73_squeeze_mask_0, x = coreml_update_state_99)[name = string("v_cache_73_cast_fp16")];
+            tensor<int32, [4]> k_cache_75_begin_0 = const()[name = string("k_cache_75_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_75_end_0 = const()[name = string("k_cache_75_end_0"), val = tensor<int32, [4]>([19, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_75_end_mask_0 = const()[name = string("k_cache_75_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_75_squeeze_mask_0 = const()[name = string("k_cache_75_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_75_cast_fp16 = slice_by_index(begin = k_cache_75_begin_0, end = k_cache_75_end_0, end_mask = k_cache_75_end_mask_0, squeeze_mask = k_cache_75_squeeze_mask_0, x = read_state_2)[name = string("k_cache_75_cast_fp16")];
+            tensor<int32, [4]> v_cache_75_begin_0 = const()[name = string("v_cache_75_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_75_end_0 = const()[name = string("v_cache_75_end_0"), val = tensor<int32, [4]>([19, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_75_end_mask_0 = const()[name = string("v_cache_75_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_75_squeeze_mask_0 = const()[name = string("v_cache_75_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_75_cast_fp16 = slice_by_index(begin = v_cache_75_begin_0, end = v_cache_75_end_0, end_mask = v_cache_75_end_mask_0, squeeze_mask = v_cache_75_squeeze_mask_0, x = read_state_3)[name = string("v_cache_75_cast_fp16")];
+            int32 var_3978 = const()[name = string("op_3978"), val = int32(-1)];
+            tensor<int32, [1]> var_3996_axes_0 = const()[name = string("op_3996_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964680704)))];
+            tensor<fp16, [1280]> blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964683328)))];
+            fp16 var_3984_to_fp16 = const()[name = string("op_3984_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_3996_cast_fp16 = layer_norm(axes = var_3996_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_327_cast_fp16)[name = string("op_3996_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4007_to_fp16 = const()[name = string("op_4007_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964685952)))];
+            tensor<fp16, [1280]> var_4008_to_fp16 = const()[name = string("op_4008_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967962816)))];
+            tensor<fp16, [1, ?, 1280]> linear_144_cast_fp16 = linear(bias = var_4008_to_fp16, weight = var_4007_to_fp16, x = var_3996_cast_fp16)[name = string("linear_144_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4011_to_fp16 = const()[name = string("op_4011_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967965440)))];
+            tensor<fp16, [1, ?, 1280]> linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4011_to_fp16, x = var_3996_cast_fp16)[name = string("linear_145_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4015_to_fp16 = const()[name = string("op_4015_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971242304)))];
+            tensor<fp16, [1280]> var_4016_to_fp16 = const()[name = string("op_4016_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(974519168)))];
+            tensor<fp16, [1, ?, 1280]> linear_146_cast_fp16 = linear(bias = var_4016_to_fp16, weight = var_4015_to_fp16, x = var_3996_cast_fp16)[name = string("linear_146_cast_fp16")];
+            tensor<int32, [3]> var_4018_shape_cast_fp16 = shape(x = linear_144_cast_fp16)[name = string("op_4018_shape_cast_fp16")];
+            int32 gather_218_axis_0 = const()[name = string("gather_218_axis_0"), val = int32(0)];
+            int32 gather_218_batch_dims_0 = const()[name = string("gather_218_batch_dims_0"), val = int32(0)];
+            bool gather_218_validate_indices_0 = const()[name = string("gather_218_validate_indices_0"), val = bool(false)];
+            string var_4018_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4018_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_218_to_uint16 = const()[name = string("select_218_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4018_shape_cast_fp16_to_uint16 = cast(dtype = var_4018_shape_cast_fp16_to_uint16_dtype_0, x = var_4018_shape_cast_fp16)[name = string("cast_354")];
+            uint16 gather_218_cast_uint16 = gather(axis = gather_218_axis_0, batch_dims = gather_218_batch_dims_0, indices = select_218_to_uint16, validate_indices = gather_218_validate_indices_0, x = var_4018_shape_cast_fp16_to_uint16)[name = string("gather_218_cast_uint16")];
+            string gather_218_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_218_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_218_cast_uint16_to_int32 = cast(dtype = gather_218_cast_uint16_to_int32_dtype_0, x = gather_218_cast_uint16)[name = string("cast_353")];
+            int32 end_step_39 = add(x = offset, y = gather_218_cast_uint16_to_int32)[name = string("end_step_39")];
+            tensor<int32, [1]> expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_290 = const()[name = string("expand_dims_290"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_291_axes_0 = const()[name = string("expand_dims_291_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_291 = expand_dims(axes = expand_dims_291_axes_0, x = end_step_39)[name = string("expand_dims_291")];
+            tensor<int32, [1]> concat_400_values0_0 = const()[name = string("concat_400_values0_0"), val = tensor<int32, [1]>([18])];
+            int32 concat_400_axis_0 = const()[name = string("concat_400_axis_0"), val = int32(0)];
+            bool concat_400_interleave_0 = const()[name = string("concat_400_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_400 = concat(axis = concat_400_axis_0, interleave = concat_400_interleave_0, values = (concat_400_values0_0, expand_dims_288, expand_dims_1, expand_dims_290))[name = string("concat_400")];
+            tensor<int32, [1]> concat_401_values0_0 = const()[name = string("concat_401_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_401_values1_0 = const()[name = string("concat_401_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_401_values3_0 = const()[name = string("concat_401_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_401_axis_0 = const()[name = string("concat_401_axis_0"), val = int32(0)];
+            bool concat_401_interleave_0 = const()[name = string("concat_401_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_401 = concat(axis = concat_401_axis_0, interleave = concat_401_interleave_0, values = (concat_401_values0_0, concat_401_values1_0, expand_dims_291, concat_401_values3_0))[name = string("concat_401")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = k_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = k_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_19_stride_0, update = linear_145_cast_fp16, x = coreml_update_state_98)[name = string("k_cache1_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_19_cast_fp16, input = k_cache1)[name = string("coreml_update_state_100_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_100 = read_state(input = k_cache1)[name = string("coreml_update_state_100")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = v_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = v_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_19_stride_0, update = linear_146_cast_fp16, x = coreml_update_state_99)[name = string("v_cache1_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_19_cast_fp16, input = v_cache1)[name = string("coreml_update_state_101_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_101 = read_state(input = v_cache1)[name = string("coreml_update_state_101")];
+            int32 concat_406_values0_0 = const()[name = string("concat_406_values0_0"), val = int32(1)];
+            int32 concat_406_values2_0 = const()[name = string("concat_406_values2_0"), val = int32(1280)];
+            int32 concat_406_axis_0 = const()[name = string("concat_406_axis_0"), val = int32(0)];
+            bool concat_406_interleave_0 = const()[name = string("concat_406_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_406 = concat(axis = concat_406_axis_0, interleave = concat_406_interleave_0, values = (concat_406_values0_0, end_step_39, concat_406_values2_0))[name = string("concat_406")];
+            tensor<int32, [3]> var_4034_begin_0 = const()[name = string("op_4034_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4034_end_mask_0 = const()[name = string("op_4034_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4034_cast_fp16 = slice_by_index(begin = var_4034_begin_0, end = concat_406, end_mask = var_4034_end_mask_0, x = k_cache_73_cast_fp16)[name = string("op_4034_cast_fp16")];
+            tensor<int32, [3]> var_4037_begin_0 = const()[name = string("op_4037_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4037_end_mask_0 = const()[name = string("op_4037_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4037_cast_fp16 = slice_by_index(begin = var_4037_begin_0, end = concat_406, end_mask = var_4037_end_mask_0, x = v_cache_73_cast_fp16)[name = string("op_4037_cast_fp16")];
+            tensor<int32, [4]> concat_408x = const()[name = string("concat_408x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4047_cast_fp16 = reshape(shape = concat_408x, x = linear_144_cast_fp16)[name = string("op_4047_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_232_to_fp16 = const()[name = string("const_232_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_147_cast_fp16 = mul(x = var_4047_cast_fp16, y = const_232_to_fp16)[name = string("q_147_cast_fp16")];
+            tensor<int32, [4]> concat_409x = const()[name = string("concat_409x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4054_cast_fp16 = reshape(shape = concat_409x, x = var_4034_cast_fp16)[name = string("op_4054_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_185_cast_fp16 = mul(x = var_4054_cast_fp16, y = const_233_to_fp16)[name = string("k_185_cast_fp16")];
+            tensor<int32, [4]> concat_410x = const()[name = string("concat_410x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4061_cast_fp16 = reshape(shape = concat_410x, x = var_4037_cast_fp16)[name = string("op_4061_cast_fp16")];
+            tensor<int32, [4]> var_4062 = const()[name = string("op_4062"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_109_transpose_x_0 = const()[name = string("qk_109_transpose_x_0"), val = bool(false)];
+            bool qk_109_transpose_y_0 = const()[name = string("qk_109_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_329_perm_0 = const()[name = string("transpose_329_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_330_perm_0 = const()[name = string("transpose_330_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_330 = transpose(perm = transpose_330_perm_0, x = k_185_cast_fp16)[name = string("transpose_494")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_329 = transpose(perm = transpose_329_perm_0, x = q_147_cast_fp16)[name = string("transpose_495")];
+            tensor<fp16, [1, 20, ?, ?]> qk_109_cast_fp16 = matmul(transpose_x = qk_109_transpose_x_0, transpose_y = qk_109_transpose_y_0, x = transpose_329, y = transpose_330)[name = string("qk_109_cast_fp16")];
+            int32 concat_411_values1_0 = const()[name = string("concat_411_values1_0"), val = int32(448)];
+            int32 concat_411_axis_0 = const()[name = string("concat_411_axis_0"), val = int32(0)];
+            bool concat_411_interleave_0 = const()[name = string("concat_411_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_411 = concat(axis = concat_411_axis_0, interleave = concat_411_interleave_0, values = (gather_218_cast_uint16_to_int32, concat_411_values1_0))[name = string("concat_411")];
+            tensor<int32, [2]> var_4065_begin_0 = const()[name = string("op_4065_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4065_end_mask_0 = const()[name = string("op_4065_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4065_cast_fp16 = slice_by_index(begin = var_4065_begin_0, end = concat_411, end_mask = var_4065_end_mask_0, x = mask_to_fp16)[name = string("op_4065_cast_fp16")];
+            int32 concat_412_values0_0 = const()[name = string("concat_412_values0_0"), val = int32(0)];
+            int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)];
+            bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (concat_412_values0_0, gather_218_cast_uint16_to_int32))[name = string("concat_412")];
+            tensor<int32, [2]> var_4066_begin_0 = const()[name = string("op_4066_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4066_end_mask_0 = const()[name = string("op_4066_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4066_cast_fp16 = slice_by_index(begin = var_4066_begin_0, end = concat_412, end_mask = var_4066_end_mask_0, x = var_4065_cast_fp16)[name = string("op_4066_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_111_cast_fp16 = add(x = qk_109_cast_fp16, y = var_4066_cast_fp16)[name = string("qk_111_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_4069_cast_fp16 = softmax(axis = var_3978, x = qk_111_cast_fp16)[name = string("op_4069_cast_fp16")];
+            bool var_4071_transpose_x_0 = const()[name = string("op_4071_transpose_x_0"), val = bool(false)];
+            bool var_4071_transpose_y_0 = const()[name = string("op_4071_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_185_cast_fp16 = transpose(perm = var_4062, x = var_4061_cast_fp16)[name = string("transpose_496")];
+            tensor<fp16, [1, 20, ?, 64]> var_4071_cast_fp16 = matmul(transpose_x = var_4071_transpose_x_0, transpose_y = var_4071_transpose_y_0, x = var_4069_cast_fp16, y = v_185_cast_fp16)[name = string("op_4071_cast_fp16")];
+            tensor<int32, [4]> var_4072 = const()[name = string("op_4072"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_413x = const()[name = string("concat_413x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4073_cast_fp16 = transpose(perm = var_4072, x = var_4071_cast_fp16)[name = string("transpose_493")];
+            tensor<fp16, [1, ?, 1280]> x_331_cast_fp16 = reshape(shape = concat_413x, x = var_4073_cast_fp16)[name = string("x_331_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4077_to_fp16 = const()[name = string("op_4077_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(974521792)))];
+            tensor<fp16, [1280]> var_4078_to_fp16 = const()[name = string("op_4078_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977798656)))];
+            tensor<fp16, [1, ?, 1280]> linear_147_cast_fp16 = linear(bias = var_4078_to_fp16, weight = var_4077_to_fp16, x = x_331_cast_fp16)[name = string("linear_147_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_333_cast_fp16 = add(x = x_327_cast_fp16, y = linear_147_cast_fp16)[name = string("x_333_cast_fp16")];
+            tensor<int32, [1]> var_4085_axes_0 = const()[name = string("op_4085_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_18_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977801280)))];
+            tensor<fp16, [1280]> blocks_18_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977803904)))];
+            tensor<fp16, [1, ?, 1280]> var_4085_cast_fp16 = layer_norm(axes = var_4085_axes_0, beta = blocks_18_cross_attn_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_cross_attn_ln_weight_to_fp16, x = x_333_cast_fp16)[name = string("op_4085_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4094_to_fp16 = const()[name = string("op_4094_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977806528)))];
+            tensor<fp16, [1280]> var_4095_to_fp16 = const()[name = string("op_4095_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981083392)))];
+            tensor<fp16, [1, ?, 1280]> linear_148_cast_fp16 = linear(bias = var_4095_to_fp16, weight = var_4094_to_fp16, x = var_4085_cast_fp16)[name = string("linear_148_cast_fp16")];
+            tensor<int32, [3]> concat_414 = const()[name = string("concat_414"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_415 = const()[name = string("concat_415"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_187_internal_tensor_assign_1_stride_0 = const()[name = string("k_187_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_414, begin_mask = k_187_internal_tensor_assign_1_begin_mask_0, end = concat_415, end_mask = k_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_187_internal_tensor_assign_1_squeeze_mask_0, stride = k_187_internal_tensor_assign_1_stride_0, update = k_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("k_187_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_416 = const()[name = string("concat_416"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_417 = const()[name = string("concat_417"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_187_internal_tensor_assign_1_stride_0 = const()[name = string("v_187_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_416, begin_mask = v_187_internal_tensor_assign_1_begin_mask_0, end = concat_417, end_mask = v_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_187_internal_tensor_assign_1_squeeze_mask_0, stride = v_187_internal_tensor_assign_1_stride_0, update = v_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("v_187_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_418x = const()[name = string("concat_418x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4115_cast_fp16 = reshape(shape = concat_418x, x = linear_148_cast_fp16)[name = string("op_4115_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_234_to_fp16 = const()[name = string("const_234_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_151_cast_fp16 = mul(x = var_4115_cast_fp16, y = const_234_to_fp16)[name = string("q_151_cast_fp16")];
+            tensor<int32, [4]> var_4121 = const()[name = string("op_4121"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4122_cast_fp16 = reshape(shape = var_4121, x = k_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4122_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_235_to_fp16 = const()[name = string("const_235_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_189_cast_fp16 = mul(x = var_4122_cast_fp16, y = const_235_to_fp16)[name = string("k_189_cast_fp16")];
+            tensor<int32, [4]> var_4128 = const()[name = string("op_4128"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4129_cast_fp16 = reshape(shape = var_4128, x = v_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4129_cast_fp16")];
+            tensor<int32, [4]> var_4130 = const()[name = string("op_4130"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_113_transpose_x_0 = const()[name = string("qk_113_transpose_x_0"), val = bool(false)];
+            bool qk_113_transpose_y_0 = const()[name = string("qk_113_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_331_perm_0 = const()[name = string("transpose_331_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_332_perm_0 = const()[name = string("transpose_332_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_332 = transpose(perm = transpose_332_perm_0, x = k_189_cast_fp16)[name = string("transpose_490")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_331 = transpose(perm = transpose_331_perm_0, x = q_151_cast_fp16)[name = string("transpose_491")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_113_cast_fp16 = matmul(transpose_x = qk_113_transpose_x_0, transpose_y = qk_113_transpose_y_0, x = transpose_331, y = transpose_332)[name = string("qk_113_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_4134_cast_fp16 = softmax(axis = var_3978, x = qk_113_cast_fp16)[name = string("op_4134_cast_fp16")];
+            bool var_4136_transpose_x_0 = const()[name = string("op_4136_transpose_x_0"), val = bool(false)];
+            bool var_4136_transpose_y_0 = const()[name = string("op_4136_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_189_cast_fp16 = transpose(perm = var_4130, x = var_4129_cast_fp16)[name = string("transpose_492")];
+            tensor<fp16, [1, 20, ?, 64]> var_4136_cast_fp16 = matmul(transpose_x = var_4136_transpose_x_0, transpose_y = var_4136_transpose_y_0, x = var_4134_cast_fp16, y = v_189_cast_fp16)[name = string("op_4136_cast_fp16")];
+            tensor<int32, [4]> var_4137 = const()[name = string("op_4137"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_419x = const()[name = string("concat_419x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4138_cast_fp16 = transpose(perm = var_4137, x = var_4136_cast_fp16)[name = string("transpose_489")];
+            tensor<fp16, [1, ?, 1280]> x_337_cast_fp16 = reshape(shape = concat_419x, x = var_4138_cast_fp16)[name = string("x_337_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4142_to_fp16 = const()[name = string("op_4142_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981086016)))];
+            tensor<fp16, [1280]> var_4143_to_fp16 = const()[name = string("op_4143_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984362880)))];
+            tensor<fp16, [1, ?, 1280]> linear_149_cast_fp16 = linear(bias = var_4143_to_fp16, weight = var_4142_to_fp16, x = x_337_cast_fp16)[name = string("linear_149_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_339_cast_fp16 = add(x = x_333_cast_fp16, y = linear_149_cast_fp16)[name = string("x_339_cast_fp16")];
+            tensor<int32, [1]> var_4150_axes_0 = const()[name = string("op_4150_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984365504)))];
+            tensor<fp16, [1280]> blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984368128)))];
+            tensor<fp16, [1, ?, 1280]> var_4150_cast_fp16 = layer_norm(axes = var_4150_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_339_cast_fp16)[name = string("op_4150_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_4159_to_fp16 = const()[name = string("op_4159_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984370752)))];
+            tensor<fp16, [5120]> var_4160_to_fp16 = const()[name = string("op_4160_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997478016)))];
+            tensor<fp16, [1, ?, 5120]> linear_150_cast_fp16 = linear(bias = var_4160_to_fp16, weight = var_4159_to_fp16, x = var_4150_cast_fp16)[name = string("linear_150_cast_fp16")];
+            string x_343_mode_0 = const()[name = string("x_343_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_343_cast_fp16 = gelu(mode = x_343_mode_0, x = linear_150_cast_fp16)[name = string("x_343_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_4165_to_fp16 = const()[name = string("op_4165_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997488320)))];
+            tensor<fp16, [1280]> var_4166_to_fp16 = const()[name = string("op_4166_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010595584)))];
+            tensor<fp16, [1, ?, 1280]> linear_151_cast_fp16 = linear(bias = var_4166_to_fp16, weight = var_4165_to_fp16, x = x_343_cast_fp16)[name = string("linear_151_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_345_cast_fp16 = add(x = x_339_cast_fp16, y = linear_151_cast_fp16)[name = string("x_345_cast_fp16")];
+            tensor<int32, [4]> k_cache_77_begin_0 = const()[name = string("k_cache_77_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_77_end_0 = const()[name = string("k_cache_77_end_0"), val = tensor<int32, [4]>([20, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_77_end_mask_0 = const()[name = string("k_cache_77_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_77_squeeze_mask_0 = const()[name = string("k_cache_77_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_77_cast_fp16 = slice_by_index(begin = k_cache_77_begin_0, end = k_cache_77_end_0, end_mask = k_cache_77_end_mask_0, squeeze_mask = k_cache_77_squeeze_mask_0, x = coreml_update_state_100)[name = string("k_cache_77_cast_fp16")];
+            tensor<int32, [4]> v_cache_77_begin_0 = const()[name = string("v_cache_77_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_77_end_0 = const()[name = string("v_cache_77_end_0"), val = tensor<int32, [4]>([20, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_77_end_mask_0 = const()[name = string("v_cache_77_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_77_squeeze_mask_0 = const()[name = string("v_cache_77_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_77_cast_fp16 = slice_by_index(begin = v_cache_77_begin_0, end = v_cache_77_end_0, end_mask = v_cache_77_end_mask_0, squeeze_mask = v_cache_77_squeeze_mask_0, x = coreml_update_state_101)[name = string("v_cache_77_cast_fp16")];
+            tensor<int32, [4]> k_cache_79_begin_0 = const()[name = string("k_cache_79_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_79_end_0 = const()[name = string("k_cache_79_end_0"), val = tensor<int32, [4]>([20, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_79_end_mask_0 = const()[name = string("k_cache_79_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_79_squeeze_mask_0 = const()[name = string("k_cache_79_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_79_cast_fp16 = slice_by_index(begin = k_cache_79_begin_0, end = k_cache_79_end_0, end_mask = k_cache_79_end_mask_0, squeeze_mask = k_cache_79_squeeze_mask_0, x = read_state_2)[name = string("k_cache_79_cast_fp16")];
+            tensor<int32, [4]> v_cache_79_begin_0 = const()[name = string("v_cache_79_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_79_end_0 = const()[name = string("v_cache_79_end_0"), val = tensor<int32, [4]>([20, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_79_end_mask_0 = const()[name = string("v_cache_79_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_79_squeeze_mask_0 = const()[name = string("v_cache_79_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_79_cast_fp16 = slice_by_index(begin = v_cache_79_begin_0, end = v_cache_79_end_0, end_mask = v_cache_79_end_mask_0, squeeze_mask = v_cache_79_squeeze_mask_0, x = read_state_3)[name = string("v_cache_79_cast_fp16")];
+            int32 var_4189 = const()[name = string("op_4189"), val = int32(-1)];
+            tensor<int32, [1]> var_4207_axes_0 = const()[name = string("op_4207_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010598208)))];
+            tensor<fp16, [1280]> blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010600832)))];
+            fp16 var_4195_to_fp16 = const()[name = string("op_4195_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_4207_cast_fp16 = layer_norm(axes = var_4207_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_345_cast_fp16)[name = string("op_4207_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4218_to_fp16 = const()[name = string("op_4218_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010603456)))];
+            tensor<fp16, [1280]> var_4219_to_fp16 = const()[name = string("op_4219_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013880320)))];
+            tensor<fp16, [1, ?, 1280]> linear_152_cast_fp16 = linear(bias = var_4219_to_fp16, weight = var_4218_to_fp16, x = var_4207_cast_fp16)[name = string("linear_152_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4222_to_fp16 = const()[name = string("op_4222_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013882944)))];
+            tensor<fp16, [1, ?, 1280]> linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4222_to_fp16, x = var_4207_cast_fp16)[name = string("linear_153_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4226_to_fp16 = const()[name = string("op_4226_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017159808)))];
+            tensor<fp16, [1280]> var_4227_to_fp16 = const()[name = string("op_4227_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020436672)))];
+            tensor<fp16, [1, ?, 1280]> linear_154_cast_fp16 = linear(bias = var_4227_to_fp16, weight = var_4226_to_fp16, x = var_4207_cast_fp16)[name = string("linear_154_cast_fp16")];
+            tensor<int32, [3]> var_4229_shape_cast_fp16 = shape(x = linear_152_cast_fp16)[name = string("op_4229_shape_cast_fp16")];
+            int32 gather_230_axis_0 = const()[name = string("gather_230_axis_0"), val = int32(0)];
+            int32 gather_230_batch_dims_0 = const()[name = string("gather_230_batch_dims_0"), val = int32(0)];
+            bool gather_230_validate_indices_0 = const()[name = string("gather_230_validate_indices_0"), val = bool(false)];
+            string var_4229_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4229_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_230_to_uint16 = const()[name = string("select_230_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4229_shape_cast_fp16_to_uint16 = cast(dtype = var_4229_shape_cast_fp16_to_uint16_dtype_0, x = var_4229_shape_cast_fp16)[name = string("cast_352")];
+            uint16 gather_230_cast_uint16 = gather(axis = gather_230_axis_0, batch_dims = gather_230_batch_dims_0, indices = select_230_to_uint16, validate_indices = gather_230_validate_indices_0, x = var_4229_shape_cast_fp16_to_uint16)[name = string("gather_230_cast_uint16")];
+            string gather_230_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_230_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_230_cast_uint16_to_int32 = cast(dtype = gather_230_cast_uint16_to_int32_dtype_0, x = gather_230_cast_uint16)[name = string("cast_351")];
+            int32 end_step_41 = add(x = offset, y = gather_230_cast_uint16_to_int32)[name = string("end_step_41")];
+            tensor<int32, [1]> expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_307_axes_0 = const()[name = string("expand_dims_307_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_307 = expand_dims(axes = expand_dims_307_axes_0, x = end_step_41)[name = string("expand_dims_307")];
+            tensor<int32, [1]> concat_422_values0_0 = const()[name = string("concat_422_values0_0"), val = tensor<int32, [1]>([19])];
+            int32 concat_422_axis_0 = const()[name = string("concat_422_axis_0"), val = int32(0)];
+            bool concat_422_interleave_0 = const()[name = string("concat_422_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_422 = concat(axis = concat_422_axis_0, interleave = concat_422_interleave_0, values = (concat_422_values0_0, expand_dims_304, expand_dims_1, expand_dims_306))[name = string("concat_422")];
+            tensor<int32, [1]> concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_423_values1_0 = const()[name = string("concat_423_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_423_values3_0 = const()[name = string("concat_423_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)];
+            bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, concat_423_values1_0, expand_dims_307, concat_423_values3_0))[name = string("concat_423")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = k_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = k_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_20_stride_0, update = linear_153_cast_fp16, x = coreml_update_state_100)[name = string("k_cache1_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_20_cast_fp16, input = k_cache1)[name = string("coreml_update_state_102_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_102 = read_state(input = k_cache1)[name = string("coreml_update_state_102")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = v_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = v_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_20_stride_0, update = linear_154_cast_fp16, x = coreml_update_state_101)[name = string("v_cache1_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_20_cast_fp16, input = v_cache1)[name = string("coreml_update_state_103_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_103 = read_state(input = v_cache1)[name = string("coreml_update_state_103")];
+            int32 concat_428_values0_0 = const()[name = string("concat_428_values0_0"), val = int32(1)];
+            int32 concat_428_values2_0 = const()[name = string("concat_428_values2_0"), val = int32(1280)];
+            int32 concat_428_axis_0 = const()[name = string("concat_428_axis_0"), val = int32(0)];
+            bool concat_428_interleave_0 = const()[name = string("concat_428_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_428 = concat(axis = concat_428_axis_0, interleave = concat_428_interleave_0, values = (concat_428_values0_0, end_step_41, concat_428_values2_0))[name = string("concat_428")];
+            tensor<int32, [3]> var_4245_begin_0 = const()[name = string("op_4245_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4245_end_mask_0 = const()[name = string("op_4245_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4245_cast_fp16 = slice_by_index(begin = var_4245_begin_0, end = concat_428, end_mask = var_4245_end_mask_0, x = k_cache_77_cast_fp16)[name = string("op_4245_cast_fp16")];
+            tensor<int32, [3]> var_4248_begin_0 = const()[name = string("op_4248_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4248_end_mask_0 = const()[name = string("op_4248_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4248_cast_fp16 = slice_by_index(begin = var_4248_begin_0, end = concat_428, end_mask = var_4248_end_mask_0, x = v_cache_77_cast_fp16)[name = string("op_4248_cast_fp16")];
+            tensor<int32, [4]> concat_430x = const()[name = string("concat_430x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4258_cast_fp16 = reshape(shape = concat_430x, x = linear_152_cast_fp16)[name = string("op_4258_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_236_to_fp16 = const()[name = string("const_236_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_155_cast_fp16 = mul(x = var_4258_cast_fp16, y = const_236_to_fp16)[name = string("q_155_cast_fp16")];
+            tensor<int32, [4]> concat_431x = const()[name = string("concat_431x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4265_cast_fp16 = reshape(shape = concat_431x, x = var_4245_cast_fp16)[name = string("op_4265_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_237_to_fp16 = const()[name = string("const_237_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_195_cast_fp16 = mul(x = var_4265_cast_fp16, y = const_237_to_fp16)[name = string("k_195_cast_fp16")];
+            tensor<int32, [4]> concat_432x = const()[name = string("concat_432x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4272_cast_fp16 = reshape(shape = concat_432x, x = var_4248_cast_fp16)[name = string("op_4272_cast_fp16")];
+            tensor<int32, [4]> var_4273 = const()[name = string("op_4273"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_115_transpose_x_0 = const()[name = string("qk_115_transpose_x_0"), val = bool(false)];
+            bool qk_115_transpose_y_0 = const()[name = string("qk_115_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_333_perm_0 = const()[name = string("transpose_333_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_334_perm_0 = const()[name = string("transpose_334_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_334 = transpose(perm = transpose_334_perm_0, x = k_195_cast_fp16)[name = string("transpose_486")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_333 = transpose(perm = transpose_333_perm_0, x = q_155_cast_fp16)[name = string("transpose_487")];
+            tensor<fp16, [1, 20, ?, ?]> qk_115_cast_fp16 = matmul(transpose_x = qk_115_transpose_x_0, transpose_y = qk_115_transpose_y_0, x = transpose_333, y = transpose_334)[name = string("qk_115_cast_fp16")];
+            int32 concat_433_values1_0 = const()[name = string("concat_433_values1_0"), val = int32(448)];
+            int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)];
+            bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_230_cast_uint16_to_int32, concat_433_values1_0))[name = string("concat_433")];
+            tensor<int32, [2]> var_4276_begin_0 = const()[name = string("op_4276_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4276_end_mask_0 = const()[name = string("op_4276_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4276_cast_fp16 = slice_by_index(begin = var_4276_begin_0, end = concat_433, end_mask = var_4276_end_mask_0, x = mask_to_fp16)[name = string("op_4276_cast_fp16")];
+            int32 concat_434_values0_0 = const()[name = string("concat_434_values0_0"), val = int32(0)];
+            int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)];
+            bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (concat_434_values0_0, gather_230_cast_uint16_to_int32))[name = string("concat_434")];
+            tensor<int32, [2]> var_4277_begin_0 = const()[name = string("op_4277_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4277_end_mask_0 = const()[name = string("op_4277_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4277_cast_fp16 = slice_by_index(begin = var_4277_begin_0, end = concat_434, end_mask = var_4277_end_mask_0, x = var_4276_cast_fp16)[name = string("op_4277_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_117_cast_fp16 = add(x = qk_115_cast_fp16, y = var_4277_cast_fp16)[name = string("qk_117_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_4280_cast_fp16 = softmax(axis = var_4189, x = qk_117_cast_fp16)[name = string("op_4280_cast_fp16")];
+            bool var_4282_transpose_x_0 = const()[name = string("op_4282_transpose_x_0"), val = bool(false)];
+            bool var_4282_transpose_y_0 = const()[name = string("op_4282_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_195_cast_fp16 = transpose(perm = var_4273, x = var_4272_cast_fp16)[name = string("transpose_488")];
+            tensor<fp16, [1, 20, ?, 64]> var_4282_cast_fp16 = matmul(transpose_x = var_4282_transpose_x_0, transpose_y = var_4282_transpose_y_0, x = var_4280_cast_fp16, y = v_195_cast_fp16)[name = string("op_4282_cast_fp16")];
+            tensor<int32, [4]> var_4283 = const()[name = string("op_4283"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_435x = const()[name = string("concat_435x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4284_cast_fp16 = transpose(perm = var_4283, x = var_4282_cast_fp16)[name = string("transpose_485")];
+            tensor<fp16, [1, ?, 1280]> x_349_cast_fp16 = reshape(shape = concat_435x, x = var_4284_cast_fp16)[name = string("x_349_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4288_to_fp16 = const()[name = string("op_4288_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020439296)))];
+            tensor<fp16, [1280]> var_4289_to_fp16 = const()[name = string("op_4289_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023716160)))];
+            tensor<fp16, [1, ?, 1280]> linear_155_cast_fp16 = linear(bias = var_4289_to_fp16, weight = var_4288_to_fp16, x = x_349_cast_fp16)[name = string("linear_155_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_351_cast_fp16 = add(x = x_345_cast_fp16, y = linear_155_cast_fp16)[name = string("x_351_cast_fp16")];
+            tensor<int32, [1]> var_4296_axes_0 = const()[name = string("op_4296_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_19_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023718784)))];
+            tensor<fp16, [1280]> blocks_19_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023721408)))];
+            tensor<fp16, [1, ?, 1280]> var_4296_cast_fp16 = layer_norm(axes = var_4296_axes_0, beta = blocks_19_cross_attn_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_cross_attn_ln_weight_to_fp16, x = x_351_cast_fp16)[name = string("op_4296_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4305_to_fp16 = const()[name = string("op_4305_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023724032)))];
+            tensor<fp16, [1280]> var_4306_to_fp16 = const()[name = string("op_4306_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027000896)))];
+            tensor<fp16, [1, ?, 1280]> linear_156_cast_fp16 = linear(bias = var_4306_to_fp16, weight = var_4305_to_fp16, x = var_4296_cast_fp16)[name = string("linear_156_cast_fp16")];
+            tensor<int32, [3]> concat_436 = const()[name = string("concat_436"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_437 = const()[name = string("concat_437"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_197_internal_tensor_assign_1_stride_0 = const()[name = string("k_197_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_436, begin_mask = k_197_internal_tensor_assign_1_begin_mask_0, end = concat_437, end_mask = k_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_197_internal_tensor_assign_1_squeeze_mask_0, stride = k_197_internal_tensor_assign_1_stride_0, update = k_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("k_197_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_438 = const()[name = string("concat_438"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_439 = const()[name = string("concat_439"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_197_internal_tensor_assign_1_stride_0 = const()[name = string("v_197_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_438, begin_mask = v_197_internal_tensor_assign_1_begin_mask_0, end = concat_439, end_mask = v_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_197_internal_tensor_assign_1_squeeze_mask_0, stride = v_197_internal_tensor_assign_1_stride_0, update = v_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("v_197_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_440x = const()[name = string("concat_440x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4326_cast_fp16 = reshape(shape = concat_440x, x = linear_156_cast_fp16)[name = string("op_4326_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_238_to_fp16 = const()[name = string("const_238_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_159_cast_fp16 = mul(x = var_4326_cast_fp16, y = const_238_to_fp16)[name = string("q_159_cast_fp16")];
+            tensor<int32, [4]> var_4332 = const()[name = string("op_4332"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4333_cast_fp16 = reshape(shape = var_4332, x = k_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4333_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_239_to_fp16 = const()[name = string("const_239_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_199_cast_fp16 = mul(x = var_4333_cast_fp16, y = const_239_to_fp16)[name = string("k_199_cast_fp16")];
+            tensor<int32, [4]> var_4339 = const()[name = string("op_4339"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4340_cast_fp16 = reshape(shape = var_4339, x = v_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4340_cast_fp16")];
+            tensor<int32, [4]> var_4341 = const()[name = string("op_4341"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_119_transpose_x_0 = const()[name = string("qk_119_transpose_x_0"), val = bool(false)];
+            bool qk_119_transpose_y_0 = const()[name = string("qk_119_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_335_perm_0 = const()[name = string("transpose_335_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_336_perm_0 = const()[name = string("transpose_336_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_336 = transpose(perm = transpose_336_perm_0, x = k_199_cast_fp16)[name = string("transpose_482")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_335 = transpose(perm = transpose_335_perm_0, x = q_159_cast_fp16)[name = string("transpose_483")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_119_cast_fp16 = matmul(transpose_x = qk_119_transpose_x_0, transpose_y = qk_119_transpose_y_0, x = transpose_335, y = transpose_336)[name = string("qk_119_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_4345_cast_fp16 = softmax(axis = var_4189, x = qk_119_cast_fp16)[name = string("op_4345_cast_fp16")];
+            bool var_4347_transpose_x_0 = const()[name = string("op_4347_transpose_x_0"), val = bool(false)];
+            bool var_4347_transpose_y_0 = const()[name = string("op_4347_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_199_cast_fp16 = transpose(perm = var_4341, x = var_4340_cast_fp16)[name = string("transpose_484")];
+            tensor<fp16, [1, 20, ?, 64]> var_4347_cast_fp16 = matmul(transpose_x = var_4347_transpose_x_0, transpose_y = var_4347_transpose_y_0, x = var_4345_cast_fp16, y = v_199_cast_fp16)[name = string("op_4347_cast_fp16")];
+            tensor<int32, [4]> var_4348 = const()[name = string("op_4348"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_441x = const()[name = string("concat_441x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4349_cast_fp16 = transpose(perm = var_4348, x = var_4347_cast_fp16)[name = string("transpose_481")];
+            tensor<fp16, [1, ?, 1280]> x_355_cast_fp16 = reshape(shape = concat_441x, x = var_4349_cast_fp16)[name = string("x_355_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027003520)))];
+            tensor<fp16, [1280]> var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030280384)))];
+            tensor<fp16, [1, ?, 1280]> linear_157_cast_fp16 = linear(bias = var_4354_to_fp16, weight = var_4353_to_fp16, x = x_355_cast_fp16)[name = string("linear_157_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_357_cast_fp16 = add(x = x_351_cast_fp16, y = linear_157_cast_fp16)[name = string("x_357_cast_fp16")];
+            tensor<int32, [1]> var_4361_axes_0 = const()[name = string("op_4361_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030283008)))];
+            tensor<fp16, [1280]> blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030285632)))];
+            tensor<fp16, [1, ?, 1280]> var_4361_cast_fp16 = layer_norm(axes = var_4361_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_357_cast_fp16)[name = string("op_4361_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_4370_to_fp16 = const()[name = string("op_4370_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030288256)))];
+            tensor<fp16, [5120]> var_4371_to_fp16 = const()[name = string("op_4371_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043395520)))];
+            tensor<fp16, [1, ?, 5120]> linear_158_cast_fp16 = linear(bias = var_4371_to_fp16, weight = var_4370_to_fp16, x = var_4361_cast_fp16)[name = string("linear_158_cast_fp16")];
+            string x_361_mode_0 = const()[name = string("x_361_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_361_cast_fp16 = gelu(mode = x_361_mode_0, x = linear_158_cast_fp16)[name = string("x_361_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_4376_to_fp16 = const()[name = string("op_4376_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043405824)))];
+            tensor<fp16, [1280]> var_4377_to_fp16 = const()[name = string("op_4377_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056513088)))];
+            tensor<fp16, [1, ?, 1280]> linear_159_cast_fp16 = linear(bias = var_4377_to_fp16, weight = var_4376_to_fp16, x = x_361_cast_fp16)[name = string("linear_159_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_159_cast_fp16)[name = string("x_363_cast_fp16")];
+            tensor<int32, [4]> k_cache_81_begin_0 = const()[name = string("k_cache_81_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_81_end_0 = const()[name = string("k_cache_81_end_0"), val = tensor<int32, [4]>([21, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_81_end_mask_0 = const()[name = string("k_cache_81_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_81_squeeze_mask_0 = const()[name = string("k_cache_81_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_81_cast_fp16 = slice_by_index(begin = k_cache_81_begin_0, end = k_cache_81_end_0, end_mask = k_cache_81_end_mask_0, squeeze_mask = k_cache_81_squeeze_mask_0, x = coreml_update_state_102)[name = string("k_cache_81_cast_fp16")];
+            tensor<int32, [4]> v_cache_81_begin_0 = const()[name = string("v_cache_81_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_81_end_0 = const()[name = string("v_cache_81_end_0"), val = tensor<int32, [4]>([21, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_81_end_mask_0 = const()[name = string("v_cache_81_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_81_squeeze_mask_0 = const()[name = string("v_cache_81_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_81_cast_fp16 = slice_by_index(begin = v_cache_81_begin_0, end = v_cache_81_end_0, end_mask = v_cache_81_end_mask_0, squeeze_mask = v_cache_81_squeeze_mask_0, x = coreml_update_state_103)[name = string("v_cache_81_cast_fp16")];
+            tensor<int32, [4]> k_cache_83_begin_0 = const()[name = string("k_cache_83_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_83_end_0 = const()[name = string("k_cache_83_end_0"), val = tensor<int32, [4]>([21, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_83_end_mask_0 = const()[name = string("k_cache_83_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_83_squeeze_mask_0 = const()[name = string("k_cache_83_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_83_cast_fp16 = slice_by_index(begin = k_cache_83_begin_0, end = k_cache_83_end_0, end_mask = k_cache_83_end_mask_0, squeeze_mask = k_cache_83_squeeze_mask_0, x = read_state_2)[name = string("k_cache_83_cast_fp16")];
+            tensor<int32, [4]> v_cache_83_begin_0 = const()[name = string("v_cache_83_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_83_end_0 = const()[name = string("v_cache_83_end_0"), val = tensor<int32, [4]>([21, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_83_end_mask_0 = const()[name = string("v_cache_83_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_83_squeeze_mask_0 = const()[name = string("v_cache_83_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_83_cast_fp16 = slice_by_index(begin = v_cache_83_begin_0, end = v_cache_83_end_0, end_mask = v_cache_83_end_mask_0, squeeze_mask = v_cache_83_squeeze_mask_0, x = read_state_3)[name = string("v_cache_83_cast_fp16")];
+            int32 var_4400 = const()[name = string("op_4400"), val = int32(-1)];
+            tensor<int32, [1]> var_4418_axes_0 = const()[name = string("op_4418_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056515712)))];
+            tensor<fp16, [1280]> blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056518336)))];
+            fp16 var_4406_to_fp16 = const()[name = string("op_4406_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_4418_cast_fp16 = layer_norm(axes = var_4418_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_363_cast_fp16)[name = string("op_4418_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4429_to_fp16 = const()[name = string("op_4429_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056520960)))];
+            tensor<fp16, [1280]> var_4430_to_fp16 = const()[name = string("op_4430_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059797824)))];
+            tensor<fp16, [1, ?, 1280]> linear_160_cast_fp16 = linear(bias = var_4430_to_fp16, weight = var_4429_to_fp16, x = var_4418_cast_fp16)[name = string("linear_160_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4433_to_fp16 = const()[name = string("op_4433_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059800448)))];
+            tensor<fp16, [1, ?, 1280]> linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4433_to_fp16, x = var_4418_cast_fp16)[name = string("linear_161_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4437_to_fp16 = const()[name = string("op_4437_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1063077312)))];
+            tensor<fp16, [1280]> var_4438_to_fp16 = const()[name = string("op_4438_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066354176)))];
+            tensor<fp16, [1, ?, 1280]> linear_162_cast_fp16 = linear(bias = var_4438_to_fp16, weight = var_4437_to_fp16, x = var_4418_cast_fp16)[name = string("linear_162_cast_fp16")];
+            tensor<int32, [3]> var_4440_shape_cast_fp16 = shape(x = linear_160_cast_fp16)[name = string("op_4440_shape_cast_fp16")];
+            int32 gather_242_axis_0 = const()[name = string("gather_242_axis_0"), val = int32(0)];
+            int32 gather_242_batch_dims_0 = const()[name = string("gather_242_batch_dims_0"), val = int32(0)];
+            bool gather_242_validate_indices_0 = const()[name = string("gather_242_validate_indices_0"), val = bool(false)];
+            string var_4440_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4440_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_242_to_uint16 = const()[name = string("select_242_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4440_shape_cast_fp16_to_uint16 = cast(dtype = var_4440_shape_cast_fp16_to_uint16_dtype_0, x = var_4440_shape_cast_fp16)[name = string("cast_350")];
+            uint16 gather_242_cast_uint16 = gather(axis = gather_242_axis_0, batch_dims = gather_242_batch_dims_0, indices = select_242_to_uint16, validate_indices = gather_242_validate_indices_0, x = var_4440_shape_cast_fp16_to_uint16)[name = string("gather_242_cast_uint16")];
+            string gather_242_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_242_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_242_cast_uint16_to_int32 = cast(dtype = gather_242_cast_uint16_to_int32_dtype_0, x = gather_242_cast_uint16)[name = string("cast_349")];
+            int32 end_step_43 = add(x = offset, y = gather_242_cast_uint16_to_int32)[name = string("end_step_43")];
+            tensor<int32, [1]> expand_dims_320 = const()[name = string("expand_dims_320"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_323_axes_0 = const()[name = string("expand_dims_323_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_323 = expand_dims(axes = expand_dims_323_axes_0, x = end_step_43)[name = string("expand_dims_323")];
+            tensor<int32, [1]> concat_444_values0_0 = const()[name = string("concat_444_values0_0"), val = tensor<int32, [1]>([20])];
+            int32 concat_444_axis_0 = const()[name = string("concat_444_axis_0"), val = int32(0)];
+            bool concat_444_interleave_0 = const()[name = string("concat_444_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_444 = concat(axis = concat_444_axis_0, interleave = concat_444_interleave_0, values = (concat_444_values0_0, expand_dims_320, expand_dims_1, expand_dims_322))[name = string("concat_444")];
+            tensor<int32, [1]> concat_445_values0_0 = const()[name = string("concat_445_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_445_values1_0 = const()[name = string("concat_445_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_445_values3_0 = const()[name = string("concat_445_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_445_axis_0 = const()[name = string("concat_445_axis_0"), val = int32(0)];
+            bool concat_445_interleave_0 = const()[name = string("concat_445_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_445 = concat(axis = concat_445_axis_0, interleave = concat_445_interleave_0, values = (concat_445_values0_0, concat_445_values1_0, expand_dims_323, concat_445_values3_0))[name = string("concat_445")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = k_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = k_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_21_stride_0, update = linear_161_cast_fp16, x = coreml_update_state_102)[name = string("k_cache1_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_21_cast_fp16, input = k_cache1)[name = string("coreml_update_state_104_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_104 = read_state(input = k_cache1)[name = string("coreml_update_state_104")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = v_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = v_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_21_stride_0, update = linear_162_cast_fp16, x = coreml_update_state_103)[name = string("v_cache1_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_21_cast_fp16, input = v_cache1)[name = string("coreml_update_state_105_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_105 = read_state(input = v_cache1)[name = string("coreml_update_state_105")];
+            int32 concat_450_values0_0 = const()[name = string("concat_450_values0_0"), val = int32(1)];
+            int32 concat_450_values2_0 = const()[name = string("concat_450_values2_0"), val = int32(1280)];
+            int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)];
+            bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (concat_450_values0_0, end_step_43, concat_450_values2_0))[name = string("concat_450")];
+            tensor<int32, [3]> var_4456_begin_0 = const()[name = string("op_4456_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4456_end_mask_0 = const()[name = string("op_4456_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4456_cast_fp16 = slice_by_index(begin = var_4456_begin_0, end = concat_450, end_mask = var_4456_end_mask_0, x = k_cache_81_cast_fp16)[name = string("op_4456_cast_fp16")];
+            tensor<int32, [3]> var_4459_begin_0 = const()[name = string("op_4459_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4459_end_mask_0 = const()[name = string("op_4459_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4459_cast_fp16 = slice_by_index(begin = var_4459_begin_0, end = concat_450, end_mask = var_4459_end_mask_0, x = v_cache_81_cast_fp16)[name = string("op_4459_cast_fp16")];
+            tensor<int32, [4]> concat_452x = const()[name = string("concat_452x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4469_cast_fp16 = reshape(shape = concat_452x, x = linear_160_cast_fp16)[name = string("op_4469_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_163_cast_fp16 = mul(x = var_4469_cast_fp16, y = const_240_to_fp16)[name = string("q_163_cast_fp16")];
+            tensor<int32, [4]> concat_453x = const()[name = string("concat_453x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4476_cast_fp16 = reshape(shape = concat_453x, x = var_4456_cast_fp16)[name = string("op_4476_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_205_cast_fp16 = mul(x = var_4476_cast_fp16, y = const_241_to_fp16)[name = string("k_205_cast_fp16")];
+            tensor<int32, [4]> concat_454x = const()[name = string("concat_454x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4483_cast_fp16 = reshape(shape = concat_454x, x = var_4459_cast_fp16)[name = string("op_4483_cast_fp16")];
+            tensor<int32, [4]> var_4484 = const()[name = string("op_4484"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_121_transpose_x_0 = const()[name = string("qk_121_transpose_x_0"), val = bool(false)];
+            bool qk_121_transpose_y_0 = const()[name = string("qk_121_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_337_perm_0 = const()[name = string("transpose_337_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_338_perm_0 = const()[name = string("transpose_338_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_338 = transpose(perm = transpose_338_perm_0, x = k_205_cast_fp16)[name = string("transpose_478")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_337 = transpose(perm = transpose_337_perm_0, x = q_163_cast_fp16)[name = string("transpose_479")];
+            tensor<fp16, [1, 20, ?, ?]> qk_121_cast_fp16 = matmul(transpose_x = qk_121_transpose_x_0, transpose_y = qk_121_transpose_y_0, x = transpose_337, y = transpose_338)[name = string("qk_121_cast_fp16")];
+            int32 concat_455_values1_0 = const()[name = string("concat_455_values1_0"), val = int32(448)];
+            int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)];
+            bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (gather_242_cast_uint16_to_int32, concat_455_values1_0))[name = string("concat_455")];
+            tensor<int32, [2]> var_4487_begin_0 = const()[name = string("op_4487_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4487_end_mask_0 = const()[name = string("op_4487_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4487_cast_fp16 = slice_by_index(begin = var_4487_begin_0, end = concat_455, end_mask = var_4487_end_mask_0, x = mask_to_fp16)[name = string("op_4487_cast_fp16")];
+            int32 concat_456_values0_0 = const()[name = string("concat_456_values0_0"), val = int32(0)];
+            int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)];
+            bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (concat_456_values0_0, gather_242_cast_uint16_to_int32))[name = string("concat_456")];
+            tensor<int32, [2]> var_4488_begin_0 = const()[name = string("op_4488_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4488_end_mask_0 = const()[name = string("op_4488_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4488_cast_fp16 = slice_by_index(begin = var_4488_begin_0, end = concat_456, end_mask = var_4488_end_mask_0, x = var_4487_cast_fp16)[name = string("op_4488_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_123_cast_fp16 = add(x = qk_121_cast_fp16, y = var_4488_cast_fp16)[name = string("qk_123_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_4491_cast_fp16 = softmax(axis = var_4400, x = qk_123_cast_fp16)[name = string("op_4491_cast_fp16")];
+            bool var_4493_transpose_x_0 = const()[name = string("op_4493_transpose_x_0"), val = bool(false)];
+            bool var_4493_transpose_y_0 = const()[name = string("op_4493_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_205_cast_fp16 = transpose(perm = var_4484, x = var_4483_cast_fp16)[name = string("transpose_480")];
+            tensor<fp16, [1, 20, ?, 64]> var_4493_cast_fp16 = matmul(transpose_x = var_4493_transpose_x_0, transpose_y = var_4493_transpose_y_0, x = var_4491_cast_fp16, y = v_205_cast_fp16)[name = string("op_4493_cast_fp16")];
+            tensor<int32, [4]> var_4494 = const()[name = string("op_4494"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_457x = const()[name = string("concat_457x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4495_cast_fp16 = transpose(perm = var_4494, x = var_4493_cast_fp16)[name = string("transpose_477")];
+            tensor<fp16, [1, ?, 1280]> x_367_cast_fp16 = reshape(shape = concat_457x, x = var_4495_cast_fp16)[name = string("x_367_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4499_to_fp16 = const()[name = string("op_4499_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066356800)))];
+            tensor<fp16, [1280]> var_4500_to_fp16 = const()[name = string("op_4500_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069633664)))];
+            tensor<fp16, [1, ?, 1280]> linear_163_cast_fp16 = linear(bias = var_4500_to_fp16, weight = var_4499_to_fp16, x = x_367_cast_fp16)[name = string("linear_163_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_369_cast_fp16 = add(x = x_363_cast_fp16, y = linear_163_cast_fp16)[name = string("x_369_cast_fp16")];
+            tensor<int32, [1]> var_4507_axes_0 = const()[name = string("op_4507_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_20_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069636288)))];
+            tensor<fp16, [1280]> blocks_20_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069638912)))];
+            tensor<fp16, [1, ?, 1280]> var_4507_cast_fp16 = layer_norm(axes = var_4507_axes_0, beta = blocks_20_cross_attn_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_cross_attn_ln_weight_to_fp16, x = x_369_cast_fp16)[name = string("op_4507_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4516_to_fp16 = const()[name = string("op_4516_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069641536)))];
+            tensor<fp16, [1280]> var_4517_to_fp16 = const()[name = string("op_4517_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072918400)))];
+            tensor<fp16, [1, ?, 1280]> linear_164_cast_fp16 = linear(bias = var_4517_to_fp16, weight = var_4516_to_fp16, x = var_4507_cast_fp16)[name = string("linear_164_cast_fp16")];
+            tensor<int32, [3]> concat_458 = const()[name = string("concat_458"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_459 = const()[name = string("concat_459"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_207_internal_tensor_assign_1_stride_0 = const()[name = string("k_207_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_458, begin_mask = k_207_internal_tensor_assign_1_begin_mask_0, end = concat_459, end_mask = k_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_207_internal_tensor_assign_1_squeeze_mask_0, stride = k_207_internal_tensor_assign_1_stride_0, update = k_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("k_207_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_460 = const()[name = string("concat_460"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_461 = const()[name = string("concat_461"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_207_internal_tensor_assign_1_stride_0 = const()[name = string("v_207_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_460, begin_mask = v_207_internal_tensor_assign_1_begin_mask_0, end = concat_461, end_mask = v_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_207_internal_tensor_assign_1_squeeze_mask_0, stride = v_207_internal_tensor_assign_1_stride_0, update = v_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("v_207_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_462x = const()[name = string("concat_462x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4537_cast_fp16 = reshape(shape = concat_462x, x = linear_164_cast_fp16)[name = string("op_4537_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_242_to_fp16 = const()[name = string("const_242_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_167_cast_fp16 = mul(x = var_4537_cast_fp16, y = const_242_to_fp16)[name = string("q_167_cast_fp16")];
+            tensor<int32, [4]> var_4543 = const()[name = string("op_4543"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4544_cast_fp16 = reshape(shape = var_4543, x = k_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4544_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_209_cast_fp16 = mul(x = var_4544_cast_fp16, y = const_243_to_fp16)[name = string("k_209_cast_fp16")];
+            tensor<int32, [4]> var_4550 = const()[name = string("op_4550"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4551_cast_fp16 = reshape(shape = var_4550, x = v_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4551_cast_fp16")];
+            tensor<int32, [4]> var_4552 = const()[name = string("op_4552"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_125_transpose_x_0 = const()[name = string("qk_125_transpose_x_0"), val = bool(false)];
+            bool qk_125_transpose_y_0 = const()[name = string("qk_125_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_339_perm_0 = const()[name = string("transpose_339_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_340_perm_0 = const()[name = string("transpose_340_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_340 = transpose(perm = transpose_340_perm_0, x = k_209_cast_fp16)[name = string("transpose_474")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_339 = transpose(perm = transpose_339_perm_0, x = q_167_cast_fp16)[name = string("transpose_475")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_125_cast_fp16 = matmul(transpose_x = qk_125_transpose_x_0, transpose_y = qk_125_transpose_y_0, x = transpose_339, y = transpose_340)[name = string("qk_125_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_4556_cast_fp16 = softmax(axis = var_4400, x = qk_125_cast_fp16)[name = string("op_4556_cast_fp16")];
+            bool var_4558_transpose_x_0 = const()[name = string("op_4558_transpose_x_0"), val = bool(false)];
+            bool var_4558_transpose_y_0 = const()[name = string("op_4558_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_209_cast_fp16 = transpose(perm = var_4552, x = var_4551_cast_fp16)[name = string("transpose_476")];
+            tensor<fp16, [1, 20, ?, 64]> var_4558_cast_fp16 = matmul(transpose_x = var_4558_transpose_x_0, transpose_y = var_4558_transpose_y_0, x = var_4556_cast_fp16, y = v_209_cast_fp16)[name = string("op_4558_cast_fp16")];
+            tensor<int32, [4]> var_4559 = const()[name = string("op_4559"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_463x = const()[name = string("concat_463x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4560_cast_fp16 = transpose(perm = var_4559, x = var_4558_cast_fp16)[name = string("transpose_473")];
+            tensor<fp16, [1, ?, 1280]> x_373_cast_fp16 = reshape(shape = concat_463x, x = var_4560_cast_fp16)[name = string("x_373_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4564_to_fp16 = const()[name = string("op_4564_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072921024)))];
+            tensor<fp16, [1280]> var_4565_to_fp16 = const()[name = string("op_4565_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076197888)))];
+            tensor<fp16, [1, ?, 1280]> linear_165_cast_fp16 = linear(bias = var_4565_to_fp16, weight = var_4564_to_fp16, x = x_373_cast_fp16)[name = string("linear_165_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_375_cast_fp16 = add(x = x_369_cast_fp16, y = linear_165_cast_fp16)[name = string("x_375_cast_fp16")];
+            tensor<int32, [1]> var_4572_axes_0 = const()[name = string("op_4572_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076200512)))];
+            tensor<fp16, [1280]> blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076203136)))];
+            tensor<fp16, [1, ?, 1280]> var_4572_cast_fp16 = layer_norm(axes = var_4572_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_375_cast_fp16)[name = string("op_4572_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_4581_to_fp16 = const()[name = string("op_4581_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076205760)))];
+            tensor<fp16, [5120]> var_4582_to_fp16 = const()[name = string("op_4582_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1089313024)))];
+            tensor<fp16, [1, ?, 5120]> linear_166_cast_fp16 = linear(bias = var_4582_to_fp16, weight = var_4581_to_fp16, x = var_4572_cast_fp16)[name = string("linear_166_cast_fp16")];
+            string x_379_mode_0 = const()[name = string("x_379_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_379_cast_fp16 = gelu(mode = x_379_mode_0, x = linear_166_cast_fp16)[name = string("x_379_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_4587_to_fp16 = const()[name = string("op_4587_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1089323328)))];
+            tensor<fp16, [1280]> var_4588_to_fp16 = const()[name = string("op_4588_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102430592)))];
+            tensor<fp16, [1, ?, 1280]> linear_167_cast_fp16 = linear(bias = var_4588_to_fp16, weight = var_4587_to_fp16, x = x_379_cast_fp16)[name = string("linear_167_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_381_cast_fp16 = add(x = x_375_cast_fp16, y = linear_167_cast_fp16)[name = string("x_381_cast_fp16")];
+            tensor<int32, [4]> k_cache_85_begin_0 = const()[name = string("k_cache_85_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_85_end_0 = const()[name = string("k_cache_85_end_0"), val = tensor<int32, [4]>([22, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_85_end_mask_0 = const()[name = string("k_cache_85_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_85_squeeze_mask_0 = const()[name = string("k_cache_85_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_85_cast_fp16 = slice_by_index(begin = k_cache_85_begin_0, end = k_cache_85_end_0, end_mask = k_cache_85_end_mask_0, squeeze_mask = k_cache_85_squeeze_mask_0, x = coreml_update_state_104)[name = string("k_cache_85_cast_fp16")];
+            tensor<int32, [4]> v_cache_85_begin_0 = const()[name = string("v_cache_85_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_85_end_0 = const()[name = string("v_cache_85_end_0"), val = tensor<int32, [4]>([22, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_85_end_mask_0 = const()[name = string("v_cache_85_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_85_squeeze_mask_0 = const()[name = string("v_cache_85_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_85_cast_fp16 = slice_by_index(begin = v_cache_85_begin_0, end = v_cache_85_end_0, end_mask = v_cache_85_end_mask_0, squeeze_mask = v_cache_85_squeeze_mask_0, x = coreml_update_state_105)[name = string("v_cache_85_cast_fp16")];
+            tensor<int32, [4]> k_cache_87_begin_0 = const()[name = string("k_cache_87_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_87_end_0 = const()[name = string("k_cache_87_end_0"), val = tensor<int32, [4]>([22, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_87_end_mask_0 = const()[name = string("k_cache_87_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_87_squeeze_mask_0 = const()[name = string("k_cache_87_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_87_cast_fp16 = slice_by_index(begin = k_cache_87_begin_0, end = k_cache_87_end_0, end_mask = k_cache_87_end_mask_0, squeeze_mask = k_cache_87_squeeze_mask_0, x = read_state_2)[name = string("k_cache_87_cast_fp16")];
+            tensor<int32, [4]> v_cache_87_begin_0 = const()[name = string("v_cache_87_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_87_end_0 = const()[name = string("v_cache_87_end_0"), val = tensor<int32, [4]>([22, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_87_end_mask_0 = const()[name = string("v_cache_87_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_87_squeeze_mask_0 = const()[name = string("v_cache_87_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_87_cast_fp16 = slice_by_index(begin = v_cache_87_begin_0, end = v_cache_87_end_0, end_mask = v_cache_87_end_mask_0, squeeze_mask = v_cache_87_squeeze_mask_0, x = read_state_3)[name = string("v_cache_87_cast_fp16")];
+            int32 var_4611 = const()[name = string("op_4611"), val = int32(-1)];
+            tensor<int32, [1]> var_4629_axes_0 = const()[name = string("op_4629_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102433216)))];
+            tensor<fp16, [1280]> blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102435840)))];
+            fp16 var_4617_to_fp16 = const()[name = string("op_4617_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_4629_cast_fp16 = layer_norm(axes = var_4629_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_381_cast_fp16)[name = string("op_4629_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4640_to_fp16 = const()[name = string("op_4640_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102438464)))];
+            tensor<fp16, [1280]> var_4641_to_fp16 = const()[name = string("op_4641_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105715328)))];
+            tensor<fp16, [1, ?, 1280]> linear_168_cast_fp16 = linear(bias = var_4641_to_fp16, weight = var_4640_to_fp16, x = var_4629_cast_fp16)[name = string("linear_168_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4644_to_fp16 = const()[name = string("op_4644_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105717952)))];
+            tensor<fp16, [1, ?, 1280]> linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4644_to_fp16, x = var_4629_cast_fp16)[name = string("linear_169_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4648_to_fp16 = const()[name = string("op_4648_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1108994816)))];
+            tensor<fp16, [1280]> var_4649_to_fp16 = const()[name = string("op_4649_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112271680)))];
+            tensor<fp16, [1, ?, 1280]> linear_170_cast_fp16 = linear(bias = var_4649_to_fp16, weight = var_4648_to_fp16, x = var_4629_cast_fp16)[name = string("linear_170_cast_fp16")];
+            tensor<int32, [3]> var_4651_shape_cast_fp16 = shape(x = linear_168_cast_fp16)[name = string("op_4651_shape_cast_fp16")];
+            int32 gather_254_axis_0 = const()[name = string("gather_254_axis_0"), val = int32(0)];
+            int32 gather_254_batch_dims_0 = const()[name = string("gather_254_batch_dims_0"), val = int32(0)];
+            bool gather_254_validate_indices_0 = const()[name = string("gather_254_validate_indices_0"), val = bool(false)];
+            string var_4651_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4651_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_254_to_uint16 = const()[name = string("select_254_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4651_shape_cast_fp16_to_uint16 = cast(dtype = var_4651_shape_cast_fp16_to_uint16_dtype_0, x = var_4651_shape_cast_fp16)[name = string("cast_348")];
+            uint16 gather_254_cast_uint16 = gather(axis = gather_254_axis_0, batch_dims = gather_254_batch_dims_0, indices = select_254_to_uint16, validate_indices = gather_254_validate_indices_0, x = var_4651_shape_cast_fp16_to_uint16)[name = string("gather_254_cast_uint16")];
+            string gather_254_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_254_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_254_cast_uint16_to_int32 = cast(dtype = gather_254_cast_uint16_to_int32_dtype_0, x = gather_254_cast_uint16)[name = string("cast_347")];
+            int32 end_step_45 = add(x = offset, y = gather_254_cast_uint16_to_int32)[name = string("end_step_45")];
+            tensor<int32, [1]> expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_338 = const()[name = string("expand_dims_338"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_339_axes_0 = const()[name = string("expand_dims_339_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_339 = expand_dims(axes = expand_dims_339_axes_0, x = end_step_45)[name = string("expand_dims_339")];
+            tensor<int32, [1]> concat_466_values0_0 = const()[name = string("concat_466_values0_0"), val = tensor<int32, [1]>([21])];
+            int32 concat_466_axis_0 = const()[name = string("concat_466_axis_0"), val = int32(0)];
+            bool concat_466_interleave_0 = const()[name = string("concat_466_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_466 = concat(axis = concat_466_axis_0, interleave = concat_466_interleave_0, values = (concat_466_values0_0, expand_dims_336, expand_dims_1, expand_dims_338))[name = string("concat_466")];
+            tensor<int32, [1]> concat_467_values0_0 = const()[name = string("concat_467_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_467_values1_0 = const()[name = string("concat_467_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_467_values3_0 = const()[name = string("concat_467_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_467_axis_0 = const()[name = string("concat_467_axis_0"), val = int32(0)];
+            bool concat_467_interleave_0 = const()[name = string("concat_467_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_467 = concat(axis = concat_467_axis_0, interleave = concat_467_interleave_0, values = (concat_467_values0_0, concat_467_values1_0, expand_dims_339, concat_467_values3_0))[name = string("concat_467")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = k_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = k_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_22_stride_0, update = linear_169_cast_fp16, x = coreml_update_state_104)[name = string("k_cache1_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_22_cast_fp16, input = k_cache1)[name = string("coreml_update_state_106_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_106 = read_state(input = k_cache1)[name = string("coreml_update_state_106")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = v_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = v_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_22_stride_0, update = linear_170_cast_fp16, x = coreml_update_state_105)[name = string("v_cache1_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_22_cast_fp16, input = v_cache1)[name = string("coreml_update_state_107_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_107 = read_state(input = v_cache1)[name = string("coreml_update_state_107")];
+            int32 concat_472_values0_0 = const()[name = string("concat_472_values0_0"), val = int32(1)];
+            int32 concat_472_values2_0 = const()[name = string("concat_472_values2_0"), val = int32(1280)];
+            int32 concat_472_axis_0 = const()[name = string("concat_472_axis_0"), val = int32(0)];
+            bool concat_472_interleave_0 = const()[name = string("concat_472_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_472 = concat(axis = concat_472_axis_0, interleave = concat_472_interleave_0, values = (concat_472_values0_0, end_step_45, concat_472_values2_0))[name = string("concat_472")];
+            tensor<int32, [3]> var_4667_begin_0 = const()[name = string("op_4667_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4667_end_mask_0 = const()[name = string("op_4667_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4667_cast_fp16 = slice_by_index(begin = var_4667_begin_0, end = concat_472, end_mask = var_4667_end_mask_0, x = k_cache_85_cast_fp16)[name = string("op_4667_cast_fp16")];
+            tensor<int32, [3]> var_4670_begin_0 = const()[name = string("op_4670_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4670_end_mask_0 = const()[name = string("op_4670_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4670_cast_fp16 = slice_by_index(begin = var_4670_begin_0, end = concat_472, end_mask = var_4670_end_mask_0, x = v_cache_85_cast_fp16)[name = string("op_4670_cast_fp16")];
+            tensor<int32, [4]> concat_474x = const()[name = string("concat_474x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4680_cast_fp16 = reshape(shape = concat_474x, x = linear_168_cast_fp16)[name = string("op_4680_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_244_to_fp16 = const()[name = string("const_244_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_171_cast_fp16 = mul(x = var_4680_cast_fp16, y = const_244_to_fp16)[name = string("q_171_cast_fp16")];
+            tensor<int32, [4]> concat_475x = const()[name = string("concat_475x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4687_cast_fp16 = reshape(shape = concat_475x, x = var_4667_cast_fp16)[name = string("op_4687_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_215_cast_fp16 = mul(x = var_4687_cast_fp16, y = const_245_to_fp16)[name = string("k_215_cast_fp16")];
+            tensor<int32, [4]> concat_476x = const()[name = string("concat_476x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4694_cast_fp16 = reshape(shape = concat_476x, x = var_4670_cast_fp16)[name = string("op_4694_cast_fp16")];
+            tensor<int32, [4]> var_4695 = const()[name = string("op_4695"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_127_transpose_x_0 = const()[name = string("qk_127_transpose_x_0"), val = bool(false)];
+            bool qk_127_transpose_y_0 = const()[name = string("qk_127_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_341_perm_0 = const()[name = string("transpose_341_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_342_perm_0 = const()[name = string("transpose_342_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_342 = transpose(perm = transpose_342_perm_0, x = k_215_cast_fp16)[name = string("transpose_470")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_341 = transpose(perm = transpose_341_perm_0, x = q_171_cast_fp16)[name = string("transpose_471")];
+            tensor<fp16, [1, 20, ?, ?]> qk_127_cast_fp16 = matmul(transpose_x = qk_127_transpose_x_0, transpose_y = qk_127_transpose_y_0, x = transpose_341, y = transpose_342)[name = string("qk_127_cast_fp16")];
+            int32 concat_477_values1_0 = const()[name = string("concat_477_values1_0"), val = int32(448)];
+            int32 concat_477_axis_0 = const()[name = string("concat_477_axis_0"), val = int32(0)];
+            bool concat_477_interleave_0 = const()[name = string("concat_477_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_477 = concat(axis = concat_477_axis_0, interleave = concat_477_interleave_0, values = (gather_254_cast_uint16_to_int32, concat_477_values1_0))[name = string("concat_477")];
+            tensor<int32, [2]> var_4698_begin_0 = const()[name = string("op_4698_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4698_end_mask_0 = const()[name = string("op_4698_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4698_cast_fp16 = slice_by_index(begin = var_4698_begin_0, end = concat_477, end_mask = var_4698_end_mask_0, x = mask_to_fp16)[name = string("op_4698_cast_fp16")];
+            int32 concat_478_values0_0 = const()[name = string("concat_478_values0_0"), val = int32(0)];
+            int32 concat_478_axis_0 = const()[name = string("concat_478_axis_0"), val = int32(0)];
+            bool concat_478_interleave_0 = const()[name = string("concat_478_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_478 = concat(axis = concat_478_axis_0, interleave = concat_478_interleave_0, values = (concat_478_values0_0, gather_254_cast_uint16_to_int32))[name = string("concat_478")];
+            tensor<int32, [2]> var_4699_begin_0 = const()[name = string("op_4699_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4699_end_mask_0 = const()[name = string("op_4699_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4699_cast_fp16 = slice_by_index(begin = var_4699_begin_0, end = concat_478, end_mask = var_4699_end_mask_0, x = var_4698_cast_fp16)[name = string("op_4699_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_129_cast_fp16 = add(x = qk_127_cast_fp16, y = var_4699_cast_fp16)[name = string("qk_129_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_4702_cast_fp16 = softmax(axis = var_4611, x = qk_129_cast_fp16)[name = string("op_4702_cast_fp16")];
+            bool var_4704_transpose_x_0 = const()[name = string("op_4704_transpose_x_0"), val = bool(false)];
+            bool var_4704_transpose_y_0 = const()[name = string("op_4704_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_215_cast_fp16 = transpose(perm = var_4695, x = var_4694_cast_fp16)[name = string("transpose_472")];
+            tensor<fp16, [1, 20, ?, 64]> var_4704_cast_fp16 = matmul(transpose_x = var_4704_transpose_x_0, transpose_y = var_4704_transpose_y_0, x = var_4702_cast_fp16, y = v_215_cast_fp16)[name = string("op_4704_cast_fp16")];
+            tensor<int32, [4]> var_4705 = const()[name = string("op_4705"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_479x = const()[name = string("concat_479x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4706_cast_fp16 = transpose(perm = var_4705, x = var_4704_cast_fp16)[name = string("transpose_469")];
+            tensor<fp16, [1, ?, 1280]> x_385_cast_fp16 = reshape(shape = concat_479x, x = var_4706_cast_fp16)[name = string("x_385_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4710_to_fp16 = const()[name = string("op_4710_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112274304)))];
+            tensor<fp16, [1280]> var_4711_to_fp16 = const()[name = string("op_4711_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115551168)))];
+            tensor<fp16, [1, ?, 1280]> linear_171_cast_fp16 = linear(bias = var_4711_to_fp16, weight = var_4710_to_fp16, x = x_385_cast_fp16)[name = string("linear_171_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_387_cast_fp16 = add(x = x_381_cast_fp16, y = linear_171_cast_fp16)[name = string("x_387_cast_fp16")];
+            tensor<int32, [1]> var_4718_axes_0 = const()[name = string("op_4718_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_21_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115553792)))];
+            tensor<fp16, [1280]> blocks_21_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115556416)))];
+            tensor<fp16, [1, ?, 1280]> var_4718_cast_fp16 = layer_norm(axes = var_4718_axes_0, beta = blocks_21_cross_attn_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_cross_attn_ln_weight_to_fp16, x = x_387_cast_fp16)[name = string("op_4718_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4727_to_fp16 = const()[name = string("op_4727_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115559040)))];
+            tensor<fp16, [1280]> var_4728_to_fp16 = const()[name = string("op_4728_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1118835904)))];
+            tensor<fp16, [1, ?, 1280]> linear_172_cast_fp16 = linear(bias = var_4728_to_fp16, weight = var_4727_to_fp16, x = var_4718_cast_fp16)[name = string("linear_172_cast_fp16")];
+            tensor<int32, [3]> concat_480 = const()[name = string("concat_480"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_481 = const()[name = string("concat_481"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_217_internal_tensor_assign_1_stride_0 = const()[name = string("k_217_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_480, begin_mask = k_217_internal_tensor_assign_1_begin_mask_0, end = concat_481, end_mask = k_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_217_internal_tensor_assign_1_squeeze_mask_0, stride = k_217_internal_tensor_assign_1_stride_0, update = k_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("k_217_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_482 = const()[name = string("concat_482"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_483 = const()[name = string("concat_483"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_217_internal_tensor_assign_1_stride_0 = const()[name = string("v_217_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_482, begin_mask = v_217_internal_tensor_assign_1_begin_mask_0, end = concat_483, end_mask = v_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_217_internal_tensor_assign_1_squeeze_mask_0, stride = v_217_internal_tensor_assign_1_stride_0, update = v_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("v_217_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_484x = const()[name = string("concat_484x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4748_cast_fp16 = reshape(shape = concat_484x, x = linear_172_cast_fp16)[name = string("op_4748_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_246_to_fp16 = const()[name = string("const_246_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_175_cast_fp16 = mul(x = var_4748_cast_fp16, y = const_246_to_fp16)[name = string("q_175_cast_fp16")];
+            tensor<int32, [4]> var_4754 = const()[name = string("op_4754"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4755_cast_fp16 = reshape(shape = var_4754, x = k_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4755_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_247_to_fp16 = const()[name = string("const_247_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_219_cast_fp16 = mul(x = var_4755_cast_fp16, y = const_247_to_fp16)[name = string("k_219_cast_fp16")];
+            tensor<int32, [4]> var_4761 = const()[name = string("op_4761"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4762_cast_fp16 = reshape(shape = var_4761, x = v_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4762_cast_fp16")];
+            tensor<int32, [4]> var_4763 = const()[name = string("op_4763"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_131_transpose_x_0 = const()[name = string("qk_131_transpose_x_0"), val = bool(false)];
+            bool qk_131_transpose_y_0 = const()[name = string("qk_131_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_343_perm_0 = const()[name = string("transpose_343_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_344_perm_0 = const()[name = string("transpose_344_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_344 = transpose(perm = transpose_344_perm_0, x = k_219_cast_fp16)[name = string("transpose_466")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_343 = transpose(perm = transpose_343_perm_0, x = q_175_cast_fp16)[name = string("transpose_467")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_131_cast_fp16 = matmul(transpose_x = qk_131_transpose_x_0, transpose_y = qk_131_transpose_y_0, x = transpose_343, y = transpose_344)[name = string("qk_131_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_4767_cast_fp16 = softmax(axis = var_4611, x = qk_131_cast_fp16)[name = string("op_4767_cast_fp16")];
+            bool var_4769_transpose_x_0 = const()[name = string("op_4769_transpose_x_0"), val = bool(false)];
+            bool var_4769_transpose_y_0 = const()[name = string("op_4769_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_219_cast_fp16 = transpose(perm = var_4763, x = var_4762_cast_fp16)[name = string("transpose_468")];
+            tensor<fp16, [1, 20, ?, 64]> var_4769_cast_fp16 = matmul(transpose_x = var_4769_transpose_x_0, transpose_y = var_4769_transpose_y_0, x = var_4767_cast_fp16, y = v_219_cast_fp16)[name = string("op_4769_cast_fp16")];
+            tensor<int32, [4]> var_4770 = const()[name = string("op_4770"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_485x = const()[name = string("concat_485x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4771_cast_fp16 = transpose(perm = var_4770, x = var_4769_cast_fp16)[name = string("transpose_465")];
+            tensor<fp16, [1, ?, 1280]> x_391_cast_fp16 = reshape(shape = concat_485x, x = var_4771_cast_fp16)[name = string("x_391_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4775_to_fp16 = const()[name = string("op_4775_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1118838528)))];
+            tensor<fp16, [1280]> var_4776_to_fp16 = const()[name = string("op_4776_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122115392)))];
+            tensor<fp16, [1, ?, 1280]> linear_173_cast_fp16 = linear(bias = var_4776_to_fp16, weight = var_4775_to_fp16, x = x_391_cast_fp16)[name = string("linear_173_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_393_cast_fp16 = add(x = x_387_cast_fp16, y = linear_173_cast_fp16)[name = string("x_393_cast_fp16")];
+            tensor<int32, [1]> var_4783_axes_0 = const()[name = string("op_4783_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122118016)))];
+            tensor<fp16, [1280]> blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122120640)))];
+            tensor<fp16, [1, ?, 1280]> var_4783_cast_fp16 = layer_norm(axes = var_4783_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_393_cast_fp16)[name = string("op_4783_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_4792_to_fp16 = const()[name = string("op_4792_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122123264)))];
+            tensor<fp16, [5120]> var_4793_to_fp16 = const()[name = string("op_4793_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135230528)))];
+            tensor<fp16, [1, ?, 5120]> linear_174_cast_fp16 = linear(bias = var_4793_to_fp16, weight = var_4792_to_fp16, x = var_4783_cast_fp16)[name = string("linear_174_cast_fp16")];
+            string x_397_mode_0 = const()[name = string("x_397_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_397_cast_fp16 = gelu(mode = x_397_mode_0, x = linear_174_cast_fp16)[name = string("x_397_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_4798_to_fp16 = const()[name = string("op_4798_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135240832)))];
+            tensor<fp16, [1280]> var_4799_to_fp16 = const()[name = string("op_4799_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148348096)))];
+            tensor<fp16, [1, ?, 1280]> linear_175_cast_fp16 = linear(bias = var_4799_to_fp16, weight = var_4798_to_fp16, x = x_397_cast_fp16)[name = string("linear_175_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_399_cast_fp16 = add(x = x_393_cast_fp16, y = linear_175_cast_fp16)[name = string("x_399_cast_fp16")];
+            tensor<int32, [4]> k_cache_89_begin_0 = const()[name = string("k_cache_89_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_89_end_0 = const()[name = string("k_cache_89_end_0"), val = tensor<int32, [4]>([23, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_89_end_mask_0 = const()[name = string("k_cache_89_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_89_squeeze_mask_0 = const()[name = string("k_cache_89_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_89_cast_fp16 = slice_by_index(begin = k_cache_89_begin_0, end = k_cache_89_end_0, end_mask = k_cache_89_end_mask_0, squeeze_mask = k_cache_89_squeeze_mask_0, x = coreml_update_state_106)[name = string("k_cache_89_cast_fp16")];
+            tensor<int32, [4]> v_cache_89_begin_0 = const()[name = string("v_cache_89_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_89_end_0 = const()[name = string("v_cache_89_end_0"), val = tensor<int32, [4]>([23, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_89_end_mask_0 = const()[name = string("v_cache_89_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_89_squeeze_mask_0 = const()[name = string("v_cache_89_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_89_cast_fp16 = slice_by_index(begin = v_cache_89_begin_0, end = v_cache_89_end_0, end_mask = v_cache_89_end_mask_0, squeeze_mask = v_cache_89_squeeze_mask_0, x = coreml_update_state_107)[name = string("v_cache_89_cast_fp16")];
+            tensor<int32, [4]> k_cache_91_begin_0 = const()[name = string("k_cache_91_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_91_end_0 = const()[name = string("k_cache_91_end_0"), val = tensor<int32, [4]>([23, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_91_end_mask_0 = const()[name = string("k_cache_91_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_91_squeeze_mask_0 = const()[name = string("k_cache_91_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_91_cast_fp16 = slice_by_index(begin = k_cache_91_begin_0, end = k_cache_91_end_0, end_mask = k_cache_91_end_mask_0, squeeze_mask = k_cache_91_squeeze_mask_0, x = read_state_2)[name = string("k_cache_91_cast_fp16")];
+            tensor<int32, [4]> v_cache_91_begin_0 = const()[name = string("v_cache_91_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_91_end_0 = const()[name = string("v_cache_91_end_0"), val = tensor<int32, [4]>([23, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_91_end_mask_0 = const()[name = string("v_cache_91_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_91_squeeze_mask_0 = const()[name = string("v_cache_91_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_91_cast_fp16 = slice_by_index(begin = v_cache_91_begin_0, end = v_cache_91_end_0, end_mask = v_cache_91_end_mask_0, squeeze_mask = v_cache_91_squeeze_mask_0, x = read_state_3)[name = string("v_cache_91_cast_fp16")];
+            int32 var_4822 = const()[name = string("op_4822"), val = int32(-1)];
+            tensor<int32, [1]> var_4840_axes_0 = const()[name = string("op_4840_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148350720)))];
+            tensor<fp16, [1280]> blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148353344)))];
+            fp16 var_4828_to_fp16 = const()[name = string("op_4828_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_4840_cast_fp16 = layer_norm(axes = var_4840_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_399_cast_fp16)[name = string("op_4840_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4851_to_fp16 = const()[name = string("op_4851_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148355968)))];
+            tensor<fp16, [1280]> var_4852_to_fp16 = const()[name = string("op_4852_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151632832)))];
+            tensor<fp16, [1, ?, 1280]> linear_176_cast_fp16 = linear(bias = var_4852_to_fp16, weight = var_4851_to_fp16, x = var_4840_cast_fp16)[name = string("linear_176_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4855_to_fp16 = const()[name = string("op_4855_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151635456)))];
+            tensor<fp16, [1, ?, 1280]> linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4855_to_fp16, x = var_4840_cast_fp16)[name = string("linear_177_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4859_to_fp16 = const()[name = string("op_4859_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1154912320)))];
+            tensor<fp16, [1280]> var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158189184)))];
+            tensor<fp16, [1, ?, 1280]> linear_178_cast_fp16 = linear(bias = var_4860_to_fp16, weight = var_4859_to_fp16, x = var_4840_cast_fp16)[name = string("linear_178_cast_fp16")];
+            tensor<int32, [3]> var_4862_shape_cast_fp16 = shape(x = linear_176_cast_fp16)[name = string("op_4862_shape_cast_fp16")];
+            int32 gather_266_axis_0 = const()[name = string("gather_266_axis_0"), val = int32(0)];
+            int32 gather_266_batch_dims_0 = const()[name = string("gather_266_batch_dims_0"), val = int32(0)];
+            bool gather_266_validate_indices_0 = const()[name = string("gather_266_validate_indices_0"), val = bool(false)];
+            string var_4862_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4862_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_266_to_uint16 = const()[name = string("select_266_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4862_shape_cast_fp16_to_uint16 = cast(dtype = var_4862_shape_cast_fp16_to_uint16_dtype_0, x = var_4862_shape_cast_fp16)[name = string("cast_346")];
+            uint16 gather_266_cast_uint16 = gather(axis = gather_266_axis_0, batch_dims = gather_266_batch_dims_0, indices = select_266_to_uint16, validate_indices = gather_266_validate_indices_0, x = var_4862_shape_cast_fp16_to_uint16)[name = string("gather_266_cast_uint16")];
+            string gather_266_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_266_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_266_cast_uint16_to_int32 = cast(dtype = gather_266_cast_uint16_to_int32_dtype_0, x = gather_266_cast_uint16)[name = string("cast_345")];
+            int32 end_step_47 = add(x = offset, y = gather_266_cast_uint16_to_int32)[name = string("end_step_47")];
+            tensor<int32, [1]> expand_dims_352 = const()[name = string("expand_dims_352"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_354 = const()[name = string("expand_dims_354"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_355_axes_0 = const()[name = string("expand_dims_355_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_355 = expand_dims(axes = expand_dims_355_axes_0, x = end_step_47)[name = string("expand_dims_355")];
+            tensor<int32, [1]> concat_488_values0_0 = const()[name = string("concat_488_values0_0"), val = tensor<int32, [1]>([22])];
+            int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)];
+            bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (concat_488_values0_0, expand_dims_352, expand_dims_1, expand_dims_354))[name = string("concat_488")];
+            tensor<int32, [1]> concat_489_values0_0 = const()[name = string("concat_489_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_489_values1_0 = const()[name = string("concat_489_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_489_values3_0 = const()[name = string("concat_489_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)];
+            bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (concat_489_values0_0, concat_489_values1_0, expand_dims_355, concat_489_values3_0))[name = string("concat_489")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = k_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = k_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_23_stride_0, update = linear_177_cast_fp16, x = coreml_update_state_106)[name = string("k_cache1_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_23_cast_fp16, input = k_cache1)[name = string("coreml_update_state_108_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_108 = read_state(input = k_cache1)[name = string("coreml_update_state_108")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = v_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = v_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_23_stride_0, update = linear_178_cast_fp16, x = coreml_update_state_107)[name = string("v_cache1_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_23_cast_fp16, input = v_cache1)[name = string("coreml_update_state_109_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_109 = read_state(input = v_cache1)[name = string("coreml_update_state_109")];
+            int32 concat_494_values0_0 = const()[name = string("concat_494_values0_0"), val = int32(1)];
+            int32 concat_494_values2_0 = const()[name = string("concat_494_values2_0"), val = int32(1280)];
+            int32 concat_494_axis_0 = const()[name = string("concat_494_axis_0"), val = int32(0)];
+            bool concat_494_interleave_0 = const()[name = string("concat_494_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_494 = concat(axis = concat_494_axis_0, interleave = concat_494_interleave_0, values = (concat_494_values0_0, end_step_47, concat_494_values2_0))[name = string("concat_494")];
+            tensor<int32, [3]> var_4878_begin_0 = const()[name = string("op_4878_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4878_end_mask_0 = const()[name = string("op_4878_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4878_cast_fp16 = slice_by_index(begin = var_4878_begin_0, end = concat_494, end_mask = var_4878_end_mask_0, x = k_cache_89_cast_fp16)[name = string("op_4878_cast_fp16")];
+            tensor<int32, [3]> var_4881_begin_0 = const()[name = string("op_4881_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4881_end_mask_0 = const()[name = string("op_4881_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4881_cast_fp16 = slice_by_index(begin = var_4881_begin_0, end = concat_494, end_mask = var_4881_end_mask_0, x = v_cache_89_cast_fp16)[name = string("op_4881_cast_fp16")];
+            tensor<int32, [4]> concat_496x = const()[name = string("concat_496x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4891_cast_fp16 = reshape(shape = concat_496x, x = linear_176_cast_fp16)[name = string("op_4891_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_248_to_fp16 = const()[name = string("const_248_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_179_cast_fp16 = mul(x = var_4891_cast_fp16, y = const_248_to_fp16)[name = string("q_179_cast_fp16")];
+            tensor<int32, [4]> concat_497x = const()[name = string("concat_497x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4898_cast_fp16 = reshape(shape = concat_497x, x = var_4878_cast_fp16)[name = string("op_4898_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_225_cast_fp16 = mul(x = var_4898_cast_fp16, y = const_249_to_fp16)[name = string("k_225_cast_fp16")];
+            tensor<int32, [4]> concat_498x = const()[name = string("concat_498x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4905_cast_fp16 = reshape(shape = concat_498x, x = var_4881_cast_fp16)[name = string("op_4905_cast_fp16")];
+            tensor<int32, [4]> var_4906 = const()[name = string("op_4906"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_133_transpose_x_0 = const()[name = string("qk_133_transpose_x_0"), val = bool(false)];
+            bool qk_133_transpose_y_0 = const()[name = string("qk_133_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_345_perm_0 = const()[name = string("transpose_345_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_346_perm_0 = const()[name = string("transpose_346_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_346 = transpose(perm = transpose_346_perm_0, x = k_225_cast_fp16)[name = string("transpose_462")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_345 = transpose(perm = transpose_345_perm_0, x = q_179_cast_fp16)[name = string("transpose_463")];
+            tensor<fp16, [1, 20, ?, ?]> qk_133_cast_fp16 = matmul(transpose_x = qk_133_transpose_x_0, transpose_y = qk_133_transpose_y_0, x = transpose_345, y = transpose_346)[name = string("qk_133_cast_fp16")];
+            int32 concat_499_values1_0 = const()[name = string("concat_499_values1_0"), val = int32(448)];
+            int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)];
+            bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (gather_266_cast_uint16_to_int32, concat_499_values1_0))[name = string("concat_499")];
+            tensor<int32, [2]> var_4909_begin_0 = const()[name = string("op_4909_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4909_end_mask_0 = const()[name = string("op_4909_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4909_cast_fp16 = slice_by_index(begin = var_4909_begin_0, end = concat_499, end_mask = var_4909_end_mask_0, x = mask_to_fp16)[name = string("op_4909_cast_fp16")];
+            int32 concat_500_values0_0 = const()[name = string("concat_500_values0_0"), val = int32(0)];
+            int32 concat_500_axis_0 = const()[name = string("concat_500_axis_0"), val = int32(0)];
+            bool concat_500_interleave_0 = const()[name = string("concat_500_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_500 = concat(axis = concat_500_axis_0, interleave = concat_500_interleave_0, values = (concat_500_values0_0, gather_266_cast_uint16_to_int32))[name = string("concat_500")];
+            tensor<int32, [2]> var_4910_begin_0 = const()[name = string("op_4910_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4910_end_mask_0 = const()[name = string("op_4910_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4910_cast_fp16 = slice_by_index(begin = var_4910_begin_0, end = concat_500, end_mask = var_4910_end_mask_0, x = var_4909_cast_fp16)[name = string("op_4910_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_135_cast_fp16 = add(x = qk_133_cast_fp16, y = var_4910_cast_fp16)[name = string("qk_135_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_4913_cast_fp16 = softmax(axis = var_4822, x = qk_135_cast_fp16)[name = string("op_4913_cast_fp16")];
+            bool var_4915_transpose_x_0 = const()[name = string("op_4915_transpose_x_0"), val = bool(false)];
+            bool var_4915_transpose_y_0 = const()[name = string("op_4915_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_225_cast_fp16 = transpose(perm = var_4906, x = var_4905_cast_fp16)[name = string("transpose_464")];
+            tensor<fp16, [1, 20, ?, 64]> var_4915_cast_fp16 = matmul(transpose_x = var_4915_transpose_x_0, transpose_y = var_4915_transpose_y_0, x = var_4913_cast_fp16, y = v_225_cast_fp16)[name = string("op_4915_cast_fp16")];
+            tensor<int32, [4]> var_4916 = const()[name = string("op_4916"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_501x = const()[name = string("concat_501x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4917_cast_fp16 = transpose(perm = var_4916, x = var_4915_cast_fp16)[name = string("transpose_461")];
+            tensor<fp16, [1, ?, 1280]> x_403_cast_fp16 = reshape(shape = concat_501x, x = var_4917_cast_fp16)[name = string("x_403_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4921_to_fp16 = const()[name = string("op_4921_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158191808)))];
+            tensor<fp16, [1280]> var_4922_to_fp16 = const()[name = string("op_4922_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161468672)))];
+            tensor<fp16, [1, ?, 1280]> linear_179_cast_fp16 = linear(bias = var_4922_to_fp16, weight = var_4921_to_fp16, x = x_403_cast_fp16)[name = string("linear_179_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_405_cast_fp16 = add(x = x_399_cast_fp16, y = linear_179_cast_fp16)[name = string("x_405_cast_fp16")];
+            tensor<int32, [1]> var_4929_axes_0 = const()[name = string("op_4929_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_22_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161471296)))];
+            tensor<fp16, [1280]> blocks_22_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161473920)))];
+            tensor<fp16, [1, ?, 1280]> var_4929_cast_fp16 = layer_norm(axes = var_4929_axes_0, beta = blocks_22_cross_attn_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_cross_attn_ln_weight_to_fp16, x = x_405_cast_fp16)[name = string("op_4929_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161476544)))];
+            tensor<fp16, [1280]> var_4939_to_fp16 = const()[name = string("op_4939_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164753408)))];
+            tensor<fp16, [1, ?, 1280]> linear_180_cast_fp16 = linear(bias = var_4939_to_fp16, weight = var_4938_to_fp16, x = var_4929_cast_fp16)[name = string("linear_180_cast_fp16")];
+            tensor<int32, [3]> concat_502 = const()[name = string("concat_502"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_503 = const()[name = string("concat_503"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_227_internal_tensor_assign_1_stride_0 = const()[name = string("k_227_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_502, begin_mask = k_227_internal_tensor_assign_1_begin_mask_0, end = concat_503, end_mask = k_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_227_internal_tensor_assign_1_squeeze_mask_0, stride = k_227_internal_tensor_assign_1_stride_0, update = k_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("k_227_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_504 = const()[name = string("concat_504"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_505 = const()[name = string("concat_505"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_227_internal_tensor_assign_1_stride_0 = const()[name = string("v_227_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_504, begin_mask = v_227_internal_tensor_assign_1_begin_mask_0, end = concat_505, end_mask = v_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_227_internal_tensor_assign_1_squeeze_mask_0, stride = v_227_internal_tensor_assign_1_stride_0, update = v_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("v_227_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_506x = const()[name = string("concat_506x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4959_cast_fp16 = reshape(shape = concat_506x, x = linear_180_cast_fp16)[name = string("op_4959_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_183_cast_fp16 = mul(x = var_4959_cast_fp16, y = const_250_to_fp16)[name = string("q_183_cast_fp16")];
+            tensor<int32, [4]> var_4965 = const()[name = string("op_4965"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4966_cast_fp16 = reshape(shape = var_4965, x = k_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4966_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_229_cast_fp16 = mul(x = var_4966_cast_fp16, y = const_251_to_fp16)[name = string("k_229_cast_fp16")];
+            tensor<int32, [4]> var_4972 = const()[name = string("op_4972"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4973_cast_fp16 = reshape(shape = var_4972, x = v_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4973_cast_fp16")];
+            tensor<int32, [4]> var_4974 = const()[name = string("op_4974"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_137_transpose_x_0 = const()[name = string("qk_137_transpose_x_0"), val = bool(false)];
+            bool qk_137_transpose_y_0 = const()[name = string("qk_137_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_347_perm_0 = const()[name = string("transpose_347_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_348_perm_0 = const()[name = string("transpose_348_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_348 = transpose(perm = transpose_348_perm_0, x = k_229_cast_fp16)[name = string("transpose_458")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_347 = transpose(perm = transpose_347_perm_0, x = q_183_cast_fp16)[name = string("transpose_459")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_137_cast_fp16 = matmul(transpose_x = qk_137_transpose_x_0, transpose_y = qk_137_transpose_y_0, x = transpose_347, y = transpose_348)[name = string("qk_137_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_4978_cast_fp16 = softmax(axis = var_4822, x = qk_137_cast_fp16)[name = string("op_4978_cast_fp16")];
+            bool var_4980_transpose_x_0 = const()[name = string("op_4980_transpose_x_0"), val = bool(false)];
+            bool var_4980_transpose_y_0 = const()[name = string("op_4980_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_229_cast_fp16 = transpose(perm = var_4974, x = var_4973_cast_fp16)[name = string("transpose_460")];
+            tensor<fp16, [1, 20, ?, 64]> var_4980_cast_fp16 = matmul(transpose_x = var_4980_transpose_x_0, transpose_y = var_4980_transpose_y_0, x = var_4978_cast_fp16, y = v_229_cast_fp16)[name = string("op_4980_cast_fp16")];
+            tensor<int32, [4]> var_4981 = const()[name = string("op_4981"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_507x = const()[name = string("concat_507x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4982_cast_fp16 = transpose(perm = var_4981, x = var_4980_cast_fp16)[name = string("transpose_457")];
+            tensor<fp16, [1, ?, 1280]> x_409_cast_fp16 = reshape(shape = concat_507x, x = var_4982_cast_fp16)[name = string("x_409_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4986_to_fp16 = const()[name = string("op_4986_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164756032)))];
+            tensor<fp16, [1280]> var_4987_to_fp16 = const()[name = string("op_4987_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168032896)))];
+            tensor<fp16, [1, ?, 1280]> linear_181_cast_fp16 = linear(bias = var_4987_to_fp16, weight = var_4986_to_fp16, x = x_409_cast_fp16)[name = string("linear_181_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_411_cast_fp16 = add(x = x_405_cast_fp16, y = linear_181_cast_fp16)[name = string("x_411_cast_fp16")];
+            tensor<int32, [1]> var_4994_axes_0 = const()[name = string("op_4994_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168035520)))];
+            tensor<fp16, [1280]> blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168038144)))];
+            tensor<fp16, [1, ?, 1280]> var_4994_cast_fp16 = layer_norm(axes = var_4994_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_411_cast_fp16)[name = string("op_4994_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_5003_to_fp16 = const()[name = string("op_5003_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168040768)))];
+            tensor<fp16, [5120]> var_5004_to_fp16 = const()[name = string("op_5004_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181148032)))];
+            tensor<fp16, [1, ?, 5120]> linear_182_cast_fp16 = linear(bias = var_5004_to_fp16, weight = var_5003_to_fp16, x = var_4994_cast_fp16)[name = string("linear_182_cast_fp16")];
+            string x_415_mode_0 = const()[name = string("x_415_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_415_cast_fp16 = gelu(mode = x_415_mode_0, x = linear_182_cast_fp16)[name = string("x_415_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_5009_to_fp16 = const()[name = string("op_5009_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181158336)))];
+            tensor<fp16, [1280]> var_5010_to_fp16 = const()[name = string("op_5010_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194265600)))];
+            tensor<fp16, [1, ?, 1280]> linear_183_cast_fp16 = linear(bias = var_5010_to_fp16, weight = var_5009_to_fp16, x = x_415_cast_fp16)[name = string("linear_183_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_417_cast_fp16 = add(x = x_411_cast_fp16, y = linear_183_cast_fp16)[name = string("x_417_cast_fp16")];
+            tensor<int32, [4]> k_cache_93_begin_0 = const()[name = string("k_cache_93_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_93_end_0 = const()[name = string("k_cache_93_end_0"), val = tensor<int32, [4]>([24, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_93_end_mask_0 = const()[name = string("k_cache_93_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_93_squeeze_mask_0 = const()[name = string("k_cache_93_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_93_cast_fp16 = slice_by_index(begin = k_cache_93_begin_0, end = k_cache_93_end_0, end_mask = k_cache_93_end_mask_0, squeeze_mask = k_cache_93_squeeze_mask_0, x = coreml_update_state_108)[name = string("k_cache_93_cast_fp16")];
+            tensor<int32, [4]> v_cache_93_begin_0 = const()[name = string("v_cache_93_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_93_end_0 = const()[name = string("v_cache_93_end_0"), val = tensor<int32, [4]>([24, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_93_end_mask_0 = const()[name = string("v_cache_93_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_93_squeeze_mask_0 = const()[name = string("v_cache_93_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_93_cast_fp16 = slice_by_index(begin = v_cache_93_begin_0, end = v_cache_93_end_0, end_mask = v_cache_93_end_mask_0, squeeze_mask = v_cache_93_squeeze_mask_0, x = coreml_update_state_109)[name = string("v_cache_93_cast_fp16")];
+            tensor<int32, [4]> k_cache_95_begin_0 = const()[name = string("k_cache_95_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_95_end_0 = const()[name = string("k_cache_95_end_0"), val = tensor<int32, [4]>([24, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_95_end_mask_0 = const()[name = string("k_cache_95_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_95_squeeze_mask_0 = const()[name = string("k_cache_95_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_95_cast_fp16 = slice_by_index(begin = k_cache_95_begin_0, end = k_cache_95_end_0, end_mask = k_cache_95_end_mask_0, squeeze_mask = k_cache_95_squeeze_mask_0, x = read_state_2)[name = string("k_cache_95_cast_fp16")];
+            tensor<int32, [4]> v_cache_95_begin_0 = const()[name = string("v_cache_95_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_95_end_0 = const()[name = string("v_cache_95_end_0"), val = tensor<int32, [4]>([24, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_95_end_mask_0 = const()[name = string("v_cache_95_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_95_squeeze_mask_0 = const()[name = string("v_cache_95_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_95_cast_fp16 = slice_by_index(begin = v_cache_95_begin_0, end = v_cache_95_end_0, end_mask = v_cache_95_end_mask_0, squeeze_mask = v_cache_95_squeeze_mask_0, x = read_state_3)[name = string("v_cache_95_cast_fp16")];
+            int32 var_5033 = const()[name = string("op_5033"), val = int32(-1)];
+            tensor<int32, [1]> var_5051_axes_0 = const()[name = string("op_5051_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194268224)))];
+            tensor<fp16, [1280]> blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194270848)))];
+            fp16 var_5039_to_fp16 = const()[name = string("op_5039_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_5051_cast_fp16 = layer_norm(axes = var_5051_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_417_cast_fp16)[name = string("op_5051_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5062_to_fp16 = const()[name = string("op_5062_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194273472)))];
+            tensor<fp16, [1280]> var_5063_to_fp16 = const()[name = string("op_5063_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197550336)))];
+            tensor<fp16, [1, ?, 1280]> linear_184_cast_fp16 = linear(bias = var_5063_to_fp16, weight = var_5062_to_fp16, x = var_5051_cast_fp16)[name = string("linear_184_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5066_to_fp16 = const()[name = string("op_5066_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197552960)))];
+            tensor<fp16, [1, ?, 1280]> linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5066_to_fp16, x = var_5051_cast_fp16)[name = string("linear_185_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5070_to_fp16 = const()[name = string("op_5070_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1200829824)))];
+            tensor<fp16, [1280]> var_5071_to_fp16 = const()[name = string("op_5071_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1204106688)))];
+            tensor<fp16, [1, ?, 1280]> linear_186_cast_fp16 = linear(bias = var_5071_to_fp16, weight = var_5070_to_fp16, x = var_5051_cast_fp16)[name = string("linear_186_cast_fp16")];
+            tensor<int32, [3]> var_5073_shape_cast_fp16 = shape(x = linear_184_cast_fp16)[name = string("op_5073_shape_cast_fp16")];
+            int32 gather_278_axis_0 = const()[name = string("gather_278_axis_0"), val = int32(0)];
+            int32 gather_278_batch_dims_0 = const()[name = string("gather_278_batch_dims_0"), val = int32(0)];
+            bool gather_278_validate_indices_0 = const()[name = string("gather_278_validate_indices_0"), val = bool(false)];
+            string var_5073_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5073_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_278_to_uint16 = const()[name = string("select_278_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5073_shape_cast_fp16_to_uint16 = cast(dtype = var_5073_shape_cast_fp16_to_uint16_dtype_0, x = var_5073_shape_cast_fp16)[name = string("cast_344")];
+            uint16 gather_278_cast_uint16 = gather(axis = gather_278_axis_0, batch_dims = gather_278_batch_dims_0, indices = select_278_to_uint16, validate_indices = gather_278_validate_indices_0, x = var_5073_shape_cast_fp16_to_uint16)[name = string("gather_278_cast_uint16")];
+            string gather_278_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_278_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_278_cast_uint16_to_int32 = cast(dtype = gather_278_cast_uint16_to_int32_dtype_0, x = gather_278_cast_uint16)[name = string("cast_343")];
+            int32 end_step_49 = add(x = offset, y = gather_278_cast_uint16_to_int32)[name = string("end_step_49")];
+            tensor<int32, [1]> expand_dims_368 = const()[name = string("expand_dims_368"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_370 = const()[name = string("expand_dims_370"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_371_axes_0 = const()[name = string("expand_dims_371_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_371 = expand_dims(axes = expand_dims_371_axes_0, x = end_step_49)[name = string("expand_dims_371")];
+            tensor<int32, [1]> concat_510_values0_0 = const()[name = string("concat_510_values0_0"), val = tensor<int32, [1]>([23])];
+            int32 concat_510_axis_0 = const()[name = string("concat_510_axis_0"), val = int32(0)];
+            bool concat_510_interleave_0 = const()[name = string("concat_510_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_510 = concat(axis = concat_510_axis_0, interleave = concat_510_interleave_0, values = (concat_510_values0_0, expand_dims_368, expand_dims_1, expand_dims_370))[name = string("concat_510")];
+            tensor<int32, [1]> concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_511_values3_0 = const()[name = string("concat_511_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)];
+            bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, expand_dims_371, concat_511_values3_0))[name = string("concat_511")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = k_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = k_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_24_stride_0, update = linear_185_cast_fp16, x = coreml_update_state_108)[name = string("k_cache1_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_24_cast_fp16, input = k_cache1)[name = string("coreml_update_state_110_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_110 = read_state(input = k_cache1)[name = string("coreml_update_state_110")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = v_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = v_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_24_stride_0, update = linear_186_cast_fp16, x = coreml_update_state_109)[name = string("v_cache1_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_24_cast_fp16, input = v_cache1)[name = string("coreml_update_state_111_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_111 = read_state(input = v_cache1)[name = string("coreml_update_state_111")];
+            int32 concat_516_values0_0 = const()[name = string("concat_516_values0_0"), val = int32(1)];
+            int32 concat_516_values2_0 = const()[name = string("concat_516_values2_0"), val = int32(1280)];
+            int32 concat_516_axis_0 = const()[name = string("concat_516_axis_0"), val = int32(0)];
+            bool concat_516_interleave_0 = const()[name = string("concat_516_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_516 = concat(axis = concat_516_axis_0, interleave = concat_516_interleave_0, values = (concat_516_values0_0, end_step_49, concat_516_values2_0))[name = string("concat_516")];
+            tensor<int32, [3]> var_5089_begin_0 = const()[name = string("op_5089_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5089_end_mask_0 = const()[name = string("op_5089_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5089_cast_fp16 = slice_by_index(begin = var_5089_begin_0, end = concat_516, end_mask = var_5089_end_mask_0, x = k_cache_93_cast_fp16)[name = string("op_5089_cast_fp16")];
+            tensor<int32, [3]> var_5092_begin_0 = const()[name = string("op_5092_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5092_end_mask_0 = const()[name = string("op_5092_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5092_cast_fp16 = slice_by_index(begin = var_5092_begin_0, end = concat_516, end_mask = var_5092_end_mask_0, x = v_cache_93_cast_fp16)[name = string("op_5092_cast_fp16")];
+            tensor<int32, [4]> concat_518x = const()[name = string("concat_518x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5102_cast_fp16 = reshape(shape = concat_518x, x = linear_184_cast_fp16)[name = string("op_5102_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_252_to_fp16 = const()[name = string("const_252_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_187_cast_fp16 = mul(x = var_5102_cast_fp16, y = const_252_to_fp16)[name = string("q_187_cast_fp16")];
+            tensor<int32, [4]> concat_519x = const()[name = string("concat_519x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5109_cast_fp16 = reshape(shape = concat_519x, x = var_5089_cast_fp16)[name = string("op_5109_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_253_to_fp16 = const()[name = string("const_253_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_235_cast_fp16 = mul(x = var_5109_cast_fp16, y = const_253_to_fp16)[name = string("k_235_cast_fp16")];
+            tensor<int32, [4]> concat_520x = const()[name = string("concat_520x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5116_cast_fp16 = reshape(shape = concat_520x, x = var_5092_cast_fp16)[name = string("op_5116_cast_fp16")];
+            tensor<int32, [4]> var_5117 = const()[name = string("op_5117"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_139_transpose_x_0 = const()[name = string("qk_139_transpose_x_0"), val = bool(false)];
+            bool qk_139_transpose_y_0 = const()[name = string("qk_139_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_349_perm_0 = const()[name = string("transpose_349_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_350_perm_0 = const()[name = string("transpose_350_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_350 = transpose(perm = transpose_350_perm_0, x = k_235_cast_fp16)[name = string("transpose_454")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_349 = transpose(perm = transpose_349_perm_0, x = q_187_cast_fp16)[name = string("transpose_455")];
+            tensor<fp16, [1, 20, ?, ?]> qk_139_cast_fp16 = matmul(transpose_x = qk_139_transpose_x_0, transpose_y = qk_139_transpose_y_0, x = transpose_349, y = transpose_350)[name = string("qk_139_cast_fp16")];
+            int32 concat_521_values1_0 = const()[name = string("concat_521_values1_0"), val = int32(448)];
+            int32 concat_521_axis_0 = const()[name = string("concat_521_axis_0"), val = int32(0)];
+            bool concat_521_interleave_0 = const()[name = string("concat_521_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_521 = concat(axis = concat_521_axis_0, interleave = concat_521_interleave_0, values = (gather_278_cast_uint16_to_int32, concat_521_values1_0))[name = string("concat_521")];
+            tensor<int32, [2]> var_5120_begin_0 = const()[name = string("op_5120_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5120_end_mask_0 = const()[name = string("op_5120_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5120_cast_fp16 = slice_by_index(begin = var_5120_begin_0, end = concat_521, end_mask = var_5120_end_mask_0, x = mask_to_fp16)[name = string("op_5120_cast_fp16")];
+            int32 concat_522_values0_0 = const()[name = string("concat_522_values0_0"), val = int32(0)];
+            int32 concat_522_axis_0 = const()[name = string("concat_522_axis_0"), val = int32(0)];
+            bool concat_522_interleave_0 = const()[name = string("concat_522_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_522 = concat(axis = concat_522_axis_0, interleave = concat_522_interleave_0, values = (concat_522_values0_0, gather_278_cast_uint16_to_int32))[name = string("concat_522")];
+            tensor<int32, [2]> var_5121_begin_0 = const()[name = string("op_5121_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5121_end_mask_0 = const()[name = string("op_5121_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5121_cast_fp16 = slice_by_index(begin = var_5121_begin_0, end = concat_522, end_mask = var_5121_end_mask_0, x = var_5120_cast_fp16)[name = string("op_5121_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_141_cast_fp16 = add(x = qk_139_cast_fp16, y = var_5121_cast_fp16)[name = string("qk_141_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_5124_cast_fp16 = softmax(axis = var_5033, x = qk_141_cast_fp16)[name = string("op_5124_cast_fp16")];
+            bool var_5126_transpose_x_0 = const()[name = string("op_5126_transpose_x_0"), val = bool(false)];
+            bool var_5126_transpose_y_0 = const()[name = string("op_5126_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_235_cast_fp16 = transpose(perm = var_5117, x = var_5116_cast_fp16)[name = string("transpose_456")];
+            tensor<fp16, [1, 20, ?, 64]> var_5126_cast_fp16 = matmul(transpose_x = var_5126_transpose_x_0, transpose_y = var_5126_transpose_y_0, x = var_5124_cast_fp16, y = v_235_cast_fp16)[name = string("op_5126_cast_fp16")];
+            tensor<int32, [4]> var_5127 = const()[name = string("op_5127"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_523x = const()[name = string("concat_523x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5128_cast_fp16 = transpose(perm = var_5127, x = var_5126_cast_fp16)[name = string("transpose_453")];
+            tensor<fp16, [1, ?, 1280]> x_421_cast_fp16 = reshape(shape = concat_523x, x = var_5128_cast_fp16)[name = string("x_421_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5132_to_fp16 = const()[name = string("op_5132_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1204109312)))];
+            tensor<fp16, [1280]> var_5133_to_fp16 = const()[name = string("op_5133_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207386176)))];
+            tensor<fp16, [1, ?, 1280]> linear_187_cast_fp16 = linear(bias = var_5133_to_fp16, weight = var_5132_to_fp16, x = x_421_cast_fp16)[name = string("linear_187_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_423_cast_fp16 = add(x = x_417_cast_fp16, y = linear_187_cast_fp16)[name = string("x_423_cast_fp16")];
+            tensor<int32, [1]> var_5140_axes_0 = const()[name = string("op_5140_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_23_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207388800)))];
+            tensor<fp16, [1280]> blocks_23_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207391424)))];
+            tensor<fp16, [1, ?, 1280]> var_5140_cast_fp16 = layer_norm(axes = var_5140_axes_0, beta = blocks_23_cross_attn_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_cross_attn_ln_weight_to_fp16, x = x_423_cast_fp16)[name = string("op_5140_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5149_to_fp16 = const()[name = string("op_5149_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207394048)))];
+            tensor<fp16, [1280]> var_5150_to_fp16 = const()[name = string("op_5150_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210670912)))];
+            tensor<fp16, [1, ?, 1280]> linear_188_cast_fp16 = linear(bias = var_5150_to_fp16, weight = var_5149_to_fp16, x = var_5140_cast_fp16)[name = string("linear_188_cast_fp16")];
+            tensor<int32, [3]> concat_524 = const()[name = string("concat_524"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_525 = const()[name = string("concat_525"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_237_internal_tensor_assign_1_stride_0 = const()[name = string("k_237_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_524, begin_mask = k_237_internal_tensor_assign_1_begin_mask_0, end = concat_525, end_mask = k_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_237_internal_tensor_assign_1_squeeze_mask_0, stride = k_237_internal_tensor_assign_1_stride_0, update = k_cache_95_cast_fp16, x = k_7_to_fp16)[name = string("k_237_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_526 = const()[name = string("concat_526"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_527 = const()[name = string("concat_527"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_237_internal_tensor_assign_1_stride_0 = const()[name = string("v_237_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_526, begin_mask = v_237_internal_tensor_assign_1_begin_mask_0, end = concat_527, end_mask = v_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_237_internal_tensor_assign_1_squeeze_mask_0, stride = v_237_internal_tensor_assign_1_stride_0, update = v_cache_95_cast_fp16, x = k_7_to_fp16)[name = string("v_237_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_528x = const()[name = string("concat_528x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5170_cast_fp16 = reshape(shape = concat_528x, x = linear_188_cast_fp16)[name = string("op_5170_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_254_to_fp16 = const()[name = string("const_254_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_191_cast_fp16 = mul(x = var_5170_cast_fp16, y = const_254_to_fp16)[name = string("q_191_cast_fp16")];
+            tensor<int32, [4]> var_5176 = const()[name = string("op_5176"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5177_cast_fp16 = reshape(shape = var_5176, x = k_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5177_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_255_to_fp16 = const()[name = string("const_255_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_239_cast_fp16 = mul(x = var_5177_cast_fp16, y = const_255_to_fp16)[name = string("k_239_cast_fp16")];
+            tensor<int32, [4]> var_5183 = const()[name = string("op_5183"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5184_cast_fp16 = reshape(shape = var_5183, x = v_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5184_cast_fp16")];
+            tensor<int32, [4]> var_5185 = const()[name = string("op_5185"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_143_transpose_x_0 = const()[name = string("qk_143_transpose_x_0"), val = bool(false)];
+            bool qk_143_transpose_y_0 = const()[name = string("qk_143_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_351_perm_0 = const()[name = string("transpose_351_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_352_perm_0 = const()[name = string("transpose_352_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_352 = transpose(perm = transpose_352_perm_0, x = k_239_cast_fp16)[name = string("transpose_450")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_351 = transpose(perm = transpose_351_perm_0, x = q_191_cast_fp16)[name = string("transpose_451")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_143_cast_fp16 = matmul(transpose_x = qk_143_transpose_x_0, transpose_y = qk_143_transpose_y_0, x = transpose_351, y = transpose_352)[name = string("qk_143_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_5189_cast_fp16 = softmax(axis = var_5033, x = qk_143_cast_fp16)[name = string("op_5189_cast_fp16")];
+            bool var_5191_transpose_x_0 = const()[name = string("op_5191_transpose_x_0"), val = bool(false)];
+            bool var_5191_transpose_y_0 = const()[name = string("op_5191_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_239_cast_fp16 = transpose(perm = var_5185, x = var_5184_cast_fp16)[name = string("transpose_452")];
+            tensor<fp16, [1, 20, ?, 64]> var_5191_cast_fp16 = matmul(transpose_x = var_5191_transpose_x_0, transpose_y = var_5191_transpose_y_0, x = var_5189_cast_fp16, y = v_239_cast_fp16)[name = string("op_5191_cast_fp16")];
+            tensor<int32, [4]> var_5192 = const()[name = string("op_5192"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_529x = const()[name = string("concat_529x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5193_cast_fp16 = transpose(perm = var_5192, x = var_5191_cast_fp16)[name = string("transpose_449")];
+            tensor<fp16, [1, ?, 1280]> x_427_cast_fp16 = reshape(shape = concat_529x, x = var_5193_cast_fp16)[name = string("x_427_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5197_to_fp16 = const()[name = string("op_5197_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210673536)))];
+            tensor<fp16, [1280]> var_5198_to_fp16 = const()[name = string("op_5198_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213950400)))];
+            tensor<fp16, [1, ?, 1280]> linear_189_cast_fp16 = linear(bias = var_5198_to_fp16, weight = var_5197_to_fp16, x = x_427_cast_fp16)[name = string("linear_189_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_429_cast_fp16 = add(x = x_423_cast_fp16, y = linear_189_cast_fp16)[name = string("x_429_cast_fp16")];
+            tensor<int32, [1]> var_5205_axes_0 = const()[name = string("op_5205_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213953024)))];
+            tensor<fp16, [1280]> blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213955648)))];
+            tensor<fp16, [1, ?, 1280]> var_5205_cast_fp16 = layer_norm(axes = var_5205_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_429_cast_fp16)[name = string("op_5205_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_5214_to_fp16 = const()[name = string("op_5214_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213958272)))];
+            tensor<fp16, [5120]> var_5215_to_fp16 = const()[name = string("op_5215_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227065536)))];
+            tensor<fp16, [1, ?, 5120]> linear_190_cast_fp16 = linear(bias = var_5215_to_fp16, weight = var_5214_to_fp16, x = var_5205_cast_fp16)[name = string("linear_190_cast_fp16")];
+            string x_433_mode_0 = const()[name = string("x_433_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_433_cast_fp16 = gelu(mode = x_433_mode_0, x = linear_190_cast_fp16)[name = string("x_433_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_5220_to_fp16 = const()[name = string("op_5220_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227075840)))];
+            tensor<fp16, [1280]> var_5221_to_fp16 = const()[name = string("op_5221_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240183104)))];
+            tensor<fp16, [1, ?, 1280]> linear_191_cast_fp16 = linear(bias = var_5221_to_fp16, weight = var_5220_to_fp16, x = x_433_cast_fp16)[name = string("linear_191_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_435_cast_fp16 = add(x = x_429_cast_fp16, y = linear_191_cast_fp16)[name = string("x_435_cast_fp16")];
+            tensor<int32, [4]> k_cache_97_begin_0 = const()[name = string("k_cache_97_begin_0"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_97_end_0 = const()[name = string("k_cache_97_end_0"), val = tensor<int32, [4]>([25, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_97_end_mask_0 = const()[name = string("k_cache_97_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_97_squeeze_mask_0 = const()[name = string("k_cache_97_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_97_cast_fp16 = slice_by_index(begin = k_cache_97_begin_0, end = k_cache_97_end_0, end_mask = k_cache_97_end_mask_0, squeeze_mask = k_cache_97_squeeze_mask_0, x = coreml_update_state_110)[name = string("k_cache_97_cast_fp16")];
+            tensor<int32, [4]> v_cache_97_begin_0 = const()[name = string("v_cache_97_begin_0"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_97_end_0 = const()[name = string("v_cache_97_end_0"), val = tensor<int32, [4]>([25, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_97_end_mask_0 = const()[name = string("v_cache_97_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_97_squeeze_mask_0 = const()[name = string("v_cache_97_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_97_cast_fp16 = slice_by_index(begin = v_cache_97_begin_0, end = v_cache_97_end_0, end_mask = v_cache_97_end_mask_0, squeeze_mask = v_cache_97_squeeze_mask_0, x = coreml_update_state_111)[name = string("v_cache_97_cast_fp16")];
+            tensor<int32, [4]> k_cache_99_begin_0 = const()[name = string("k_cache_99_begin_0"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_99_end_0 = const()[name = string("k_cache_99_end_0"), val = tensor<int32, [4]>([25, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_99_end_mask_0 = const()[name = string("k_cache_99_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_99_squeeze_mask_0 = const()[name = string("k_cache_99_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_99_cast_fp16 = slice_by_index(begin = k_cache_99_begin_0, end = k_cache_99_end_0, end_mask = k_cache_99_end_mask_0, squeeze_mask = k_cache_99_squeeze_mask_0, x = read_state_2)[name = string("k_cache_99_cast_fp16")];
+            tensor<int32, [4]> v_cache_99_begin_0 = const()[name = string("v_cache_99_begin_0"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_99_end_0 = const()[name = string("v_cache_99_end_0"), val = tensor<int32, [4]>([25, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_99_end_mask_0 = const()[name = string("v_cache_99_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_99_squeeze_mask_0 = const()[name = string("v_cache_99_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_99_cast_fp16 = slice_by_index(begin = v_cache_99_begin_0, end = v_cache_99_end_0, end_mask = v_cache_99_end_mask_0, squeeze_mask = v_cache_99_squeeze_mask_0, x = read_state_3)[name = string("v_cache_99_cast_fp16")];
+            int32 var_5244 = const()[name = string("op_5244"), val = int32(-1)];
+            tensor<int32, [1]> var_5262_axes_0 = const()[name = string("op_5262_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_24_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240185728)))];
+            tensor<fp16, [1280]> blocks_24_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240188352)))];
+            fp16 var_5250_to_fp16 = const()[name = string("op_5250_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_5262_cast_fp16 = layer_norm(axes = var_5262_axes_0, beta = blocks_24_attn_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_attn_ln_weight_to_fp16, x = x_435_cast_fp16)[name = string("op_5262_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5273_to_fp16 = const()[name = string("op_5273_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240190976)))];
+            tensor<fp16, [1280]> var_5274_to_fp16 = const()[name = string("op_5274_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1243467840)))];
+            tensor<fp16, [1, ?, 1280]> linear_192_cast_fp16 = linear(bias = var_5274_to_fp16, weight = var_5273_to_fp16, x = var_5262_cast_fp16)[name = string("linear_192_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5277_to_fp16 = const()[name = string("op_5277_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1243470464)))];
+            tensor<fp16, [1, ?, 1280]> linear_193_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5277_to_fp16, x = var_5262_cast_fp16)[name = string("linear_193_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5281_to_fp16 = const()[name = string("op_5281_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1246747328)))];
+            tensor<fp16, [1280]> var_5282_to_fp16 = const()[name = string("op_5282_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1250024192)))];
+            tensor<fp16, [1, ?, 1280]> linear_194_cast_fp16 = linear(bias = var_5282_to_fp16, weight = var_5281_to_fp16, x = var_5262_cast_fp16)[name = string("linear_194_cast_fp16")];
+            tensor<int32, [3]> var_5284_shape_cast_fp16 = shape(x = linear_192_cast_fp16)[name = string("op_5284_shape_cast_fp16")];
+            int32 gather_290_axis_0 = const()[name = string("gather_290_axis_0"), val = int32(0)];
+            int32 gather_290_batch_dims_0 = const()[name = string("gather_290_batch_dims_0"), val = int32(0)];
+            bool gather_290_validate_indices_0 = const()[name = string("gather_290_validate_indices_0"), val = bool(false)];
+            string var_5284_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5284_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_290_to_uint16 = const()[name = string("select_290_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5284_shape_cast_fp16_to_uint16 = cast(dtype = var_5284_shape_cast_fp16_to_uint16_dtype_0, x = var_5284_shape_cast_fp16)[name = string("cast_342")];
+            uint16 gather_290_cast_uint16 = gather(axis = gather_290_axis_0, batch_dims = gather_290_batch_dims_0, indices = select_290_to_uint16, validate_indices = gather_290_validate_indices_0, x = var_5284_shape_cast_fp16_to_uint16)[name = string("gather_290_cast_uint16")];
+            string gather_290_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_290_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_290_cast_uint16_to_int32 = cast(dtype = gather_290_cast_uint16_to_int32_dtype_0, x = gather_290_cast_uint16)[name = string("cast_341")];
+            int32 end_step_51 = add(x = offset, y = gather_290_cast_uint16_to_int32)[name = string("end_step_51")];
+            tensor<int32, [1]> expand_dims_384 = const()[name = string("expand_dims_384"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_386 = const()[name = string("expand_dims_386"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_387_axes_0 = const()[name = string("expand_dims_387_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_387 = expand_dims(axes = expand_dims_387_axes_0, x = end_step_51)[name = string("expand_dims_387")];
+            tensor<int32, [1]> concat_532_values0_0 = const()[name = string("concat_532_values0_0"), val = tensor<int32, [1]>([24])];
+            int32 concat_532_axis_0 = const()[name = string("concat_532_axis_0"), val = int32(0)];
+            bool concat_532_interleave_0 = const()[name = string("concat_532_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_532 = concat(axis = concat_532_axis_0, interleave = concat_532_interleave_0, values = (concat_532_values0_0, expand_dims_384, expand_dims_1, expand_dims_386))[name = string("concat_532")];
+            tensor<int32, [1]> concat_533_values0_0 = const()[name = string("concat_533_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_533_values1_0 = const()[name = string("concat_533_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_533_values3_0 = const()[name = string("concat_533_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_533_axis_0 = const()[name = string("concat_533_axis_0"), val = int32(0)];
+            bool concat_533_interleave_0 = const()[name = string("concat_533_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_533 = concat(axis = concat_533_axis_0, interleave = concat_533_interleave_0, values = (concat_533_values0_0, concat_533_values1_0, expand_dims_387, concat_533_values3_0))[name = string("concat_533")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_25_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_25_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_25_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_25_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_532, begin_mask = k_cache1_internal_tensor_assign_25_begin_mask_0, end = concat_533, end_mask = k_cache1_internal_tensor_assign_25_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_25_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_25_stride_0, update = linear_193_cast_fp16, x = coreml_update_state_110)[name = string("k_cache1_internal_tensor_assign_25_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_25_cast_fp16, input = k_cache1)[name = string("coreml_update_state_112_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_112 = read_state(input = k_cache1)[name = string("coreml_update_state_112")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_25_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_25_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_25_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_25_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_532, begin_mask = v_cache1_internal_tensor_assign_25_begin_mask_0, end = concat_533, end_mask = v_cache1_internal_tensor_assign_25_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_25_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_25_stride_0, update = linear_194_cast_fp16, x = coreml_update_state_111)[name = string("v_cache1_internal_tensor_assign_25_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_25_cast_fp16, input = v_cache1)[name = string("coreml_update_state_113_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_113 = read_state(input = v_cache1)[name = string("coreml_update_state_113")];
+            int32 concat_538_values0_0 = const()[name = string("concat_538_values0_0"), val = int32(1)];
+            int32 concat_538_values2_0 = const()[name = string("concat_538_values2_0"), val = int32(1280)];
+            int32 concat_538_axis_0 = const()[name = string("concat_538_axis_0"), val = int32(0)];
+            bool concat_538_interleave_0 = const()[name = string("concat_538_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_538 = concat(axis = concat_538_axis_0, interleave = concat_538_interleave_0, values = (concat_538_values0_0, end_step_51, concat_538_values2_0))[name = string("concat_538")];
+            tensor<int32, [3]> var_5300_begin_0 = const()[name = string("op_5300_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5300_end_mask_0 = const()[name = string("op_5300_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5300_cast_fp16 = slice_by_index(begin = var_5300_begin_0, end = concat_538, end_mask = var_5300_end_mask_0, x = k_cache_97_cast_fp16)[name = string("op_5300_cast_fp16")];
+            tensor<int32, [3]> var_5303_begin_0 = const()[name = string("op_5303_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5303_end_mask_0 = const()[name = string("op_5303_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5303_cast_fp16 = slice_by_index(begin = var_5303_begin_0, end = concat_538, end_mask = var_5303_end_mask_0, x = v_cache_97_cast_fp16)[name = string("op_5303_cast_fp16")];
+            tensor<int32, [4]> concat_540x = const()[name = string("concat_540x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5313_cast_fp16 = reshape(shape = concat_540x, x = linear_192_cast_fp16)[name = string("op_5313_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_256_to_fp16 = const()[name = string("const_256_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_195_cast_fp16 = mul(x = var_5313_cast_fp16, y = const_256_to_fp16)[name = string("q_195_cast_fp16")];
+            tensor<int32, [4]> concat_541x = const()[name = string("concat_541x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5320_cast_fp16 = reshape(shape = concat_541x, x = var_5300_cast_fp16)[name = string("op_5320_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_257_to_fp16 = const()[name = string("const_257_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_245_cast_fp16 = mul(x = var_5320_cast_fp16, y = const_257_to_fp16)[name = string("k_245_cast_fp16")];
+            tensor<int32, [4]> concat_542x = const()[name = string("concat_542x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5327_cast_fp16 = reshape(shape = concat_542x, x = var_5303_cast_fp16)[name = string("op_5327_cast_fp16")];
+            tensor<int32, [4]> var_5328 = const()[name = string("op_5328"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_145_transpose_x_0 = const()[name = string("qk_145_transpose_x_0"), val = bool(false)];
+            bool qk_145_transpose_y_0 = const()[name = string("qk_145_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_353_perm_0 = const()[name = string("transpose_353_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_354_perm_0 = const()[name = string("transpose_354_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_354 = transpose(perm = transpose_354_perm_0, x = k_245_cast_fp16)[name = string("transpose_446")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_353 = transpose(perm = transpose_353_perm_0, x = q_195_cast_fp16)[name = string("transpose_447")];
+            tensor<fp16, [1, 20, ?, ?]> qk_145_cast_fp16 = matmul(transpose_x = qk_145_transpose_x_0, transpose_y = qk_145_transpose_y_0, x = transpose_353, y = transpose_354)[name = string("qk_145_cast_fp16")];
+            int32 concat_543_values1_0 = const()[name = string("concat_543_values1_0"), val = int32(448)];
+            int32 concat_543_axis_0 = const()[name = string("concat_543_axis_0"), val = int32(0)];
+            bool concat_543_interleave_0 = const()[name = string("concat_543_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_543 = concat(axis = concat_543_axis_0, interleave = concat_543_interleave_0, values = (gather_290_cast_uint16_to_int32, concat_543_values1_0))[name = string("concat_543")];
+            tensor<int32, [2]> var_5331_begin_0 = const()[name = string("op_5331_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5331_end_mask_0 = const()[name = string("op_5331_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5331_cast_fp16 = slice_by_index(begin = var_5331_begin_0, end = concat_543, end_mask = var_5331_end_mask_0, x = mask_to_fp16)[name = string("op_5331_cast_fp16")];
+            int32 concat_544_values0_0 = const()[name = string("concat_544_values0_0"), val = int32(0)];
+            int32 concat_544_axis_0 = const()[name = string("concat_544_axis_0"), val = int32(0)];
+            bool concat_544_interleave_0 = const()[name = string("concat_544_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_544 = concat(axis = concat_544_axis_0, interleave = concat_544_interleave_0, values = (concat_544_values0_0, gather_290_cast_uint16_to_int32))[name = string("concat_544")];
+            tensor<int32, [2]> var_5332_begin_0 = const()[name = string("op_5332_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5332_end_mask_0 = const()[name = string("op_5332_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5332_cast_fp16 = slice_by_index(begin = var_5332_begin_0, end = concat_544, end_mask = var_5332_end_mask_0, x = var_5331_cast_fp16)[name = string("op_5332_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_147_cast_fp16 = add(x = qk_145_cast_fp16, y = var_5332_cast_fp16)[name = string("qk_147_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_5335_cast_fp16 = softmax(axis = var_5244, x = qk_147_cast_fp16)[name = string("op_5335_cast_fp16")];
+            bool var_5337_transpose_x_0 = const()[name = string("op_5337_transpose_x_0"), val = bool(false)];
+            bool var_5337_transpose_y_0 = const()[name = string("op_5337_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_245_cast_fp16 = transpose(perm = var_5328, x = var_5327_cast_fp16)[name = string("transpose_448")];
+            tensor<fp16, [1, 20, ?, 64]> var_5337_cast_fp16 = matmul(transpose_x = var_5337_transpose_x_0, transpose_y = var_5337_transpose_y_0, x = var_5335_cast_fp16, y = v_245_cast_fp16)[name = string("op_5337_cast_fp16")];
+            tensor<int32, [4]> var_5338 = const()[name = string("op_5338"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_545x = const()[name = string("concat_545x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5339_cast_fp16 = transpose(perm = var_5338, x = var_5337_cast_fp16)[name = string("transpose_445")];
+            tensor<fp16, [1, ?, 1280]> x_439_cast_fp16 = reshape(shape = concat_545x, x = var_5339_cast_fp16)[name = string("x_439_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5343_to_fp16 = const()[name = string("op_5343_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1250026816)))];
+            tensor<fp16, [1280]> var_5344_to_fp16 = const()[name = string("op_5344_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253303680)))];
+            tensor<fp16, [1, ?, 1280]> linear_195_cast_fp16 = linear(bias = var_5344_to_fp16, weight = var_5343_to_fp16, x = x_439_cast_fp16)[name = string("linear_195_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_441_cast_fp16 = add(x = x_435_cast_fp16, y = linear_195_cast_fp16)[name = string("x_441_cast_fp16")];
+            tensor<int32, [1]> var_5351_axes_0 = const()[name = string("op_5351_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_24_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253306304)))];
+            tensor<fp16, [1280]> blocks_24_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253308928)))];
+            tensor<fp16, [1, ?, 1280]> var_5351_cast_fp16 = layer_norm(axes = var_5351_axes_0, beta = blocks_24_cross_attn_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_cross_attn_ln_weight_to_fp16, x = x_441_cast_fp16)[name = string("op_5351_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5360_to_fp16 = const()[name = string("op_5360_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253311552)))];
+            tensor<fp16, [1280]> var_5361_to_fp16 = const()[name = string("op_5361_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256588416)))];
+            tensor<fp16, [1, ?, 1280]> linear_196_cast_fp16 = linear(bias = var_5361_to_fp16, weight = var_5360_to_fp16, x = var_5351_cast_fp16)[name = string("linear_196_cast_fp16")];
+            tensor<int32, [3]> concat_546 = const()[name = string("concat_546"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_547 = const()[name = string("concat_547"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_247_internal_tensor_assign_1_stride_0 = const()[name = string("k_247_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_247_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_247_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_247_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_247_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_546, begin_mask = k_247_internal_tensor_assign_1_begin_mask_0, end = concat_547, end_mask = k_247_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_247_internal_tensor_assign_1_squeeze_mask_0, stride = k_247_internal_tensor_assign_1_stride_0, update = k_cache_99_cast_fp16, x = k_7_to_fp16)[name = string("k_247_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_548 = const()[name = string("concat_548"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_549 = const()[name = string("concat_549"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_247_internal_tensor_assign_1_stride_0 = const()[name = string("v_247_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_247_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_247_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_247_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_247_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_548, begin_mask = v_247_internal_tensor_assign_1_begin_mask_0, end = concat_549, end_mask = v_247_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_247_internal_tensor_assign_1_squeeze_mask_0, stride = v_247_internal_tensor_assign_1_stride_0, update = v_cache_99_cast_fp16, x = k_7_to_fp16)[name = string("v_247_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_550x = const()[name = string("concat_550x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5381_cast_fp16 = reshape(shape = concat_550x, x = linear_196_cast_fp16)[name = string("op_5381_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_199_cast_fp16 = mul(x = var_5381_cast_fp16, y = const_258_to_fp16)[name = string("q_199_cast_fp16")];
+            tensor<int32, [4]> var_5387 = const()[name = string("op_5387"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5388_cast_fp16 = reshape(shape = var_5387, x = k_247_internal_tensor_assign_1_cast_fp16)[name = string("op_5388_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_259_to_fp16 = const()[name = string("const_259_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_249_cast_fp16 = mul(x = var_5388_cast_fp16, y = const_259_to_fp16)[name = string("k_249_cast_fp16")];
+            tensor<int32, [4]> var_5394 = const()[name = string("op_5394"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5395_cast_fp16 = reshape(shape = var_5394, x = v_247_internal_tensor_assign_1_cast_fp16)[name = string("op_5395_cast_fp16")];
+            tensor<int32, [4]> var_5396 = const()[name = string("op_5396"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_149_transpose_x_0 = const()[name = string("qk_149_transpose_x_0"), val = bool(false)];
+            bool qk_149_transpose_y_0 = const()[name = string("qk_149_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_355_perm_0 = const()[name = string("transpose_355_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_356_perm_0 = const()[name = string("transpose_356_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_356 = transpose(perm = transpose_356_perm_0, x = k_249_cast_fp16)[name = string("transpose_442")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_355 = transpose(perm = transpose_355_perm_0, x = q_199_cast_fp16)[name = string("transpose_443")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_149_cast_fp16 = matmul(transpose_x = qk_149_transpose_x_0, transpose_y = qk_149_transpose_y_0, x = transpose_355, y = transpose_356)[name = string("qk_149_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_5400_cast_fp16 = softmax(axis = var_5244, x = qk_149_cast_fp16)[name = string("op_5400_cast_fp16")];
+            bool var_5402_transpose_x_0 = const()[name = string("op_5402_transpose_x_0"), val = bool(false)];
+            bool var_5402_transpose_y_0 = const()[name = string("op_5402_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_249_cast_fp16 = transpose(perm = var_5396, x = var_5395_cast_fp16)[name = string("transpose_444")];
+            tensor<fp16, [1, 20, ?, 64]> var_5402_cast_fp16 = matmul(transpose_x = var_5402_transpose_x_0, transpose_y = var_5402_transpose_y_0, x = var_5400_cast_fp16, y = v_249_cast_fp16)[name = string("op_5402_cast_fp16")];
+            tensor<int32, [4]> var_5403 = const()[name = string("op_5403"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_551x = const()[name = string("concat_551x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5404_cast_fp16 = transpose(perm = var_5403, x = var_5402_cast_fp16)[name = string("transpose_441")];
+            tensor<fp16, [1, ?, 1280]> x_445_cast_fp16 = reshape(shape = concat_551x, x = var_5404_cast_fp16)[name = string("x_445_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5408_to_fp16 = const()[name = string("op_5408_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256591040)))];
+            tensor<fp16, [1280]> var_5409_to_fp16 = const()[name = string("op_5409_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259867904)))];
+            tensor<fp16, [1, ?, 1280]> linear_197_cast_fp16 = linear(bias = var_5409_to_fp16, weight = var_5408_to_fp16, x = x_445_cast_fp16)[name = string("linear_197_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_447_cast_fp16 = add(x = x_441_cast_fp16, y = linear_197_cast_fp16)[name = string("x_447_cast_fp16")];
+            tensor<int32, [1]> var_5416_axes_0 = const()[name = string("op_5416_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_24_mlp_ln_weight_to_fp16 = const()[name = string("blocks_24_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259870528)))];
+            tensor<fp16, [1280]> blocks_24_mlp_ln_bias_to_fp16 = const()[name = string("blocks_24_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259873152)))];
+            tensor<fp16, [1, ?, 1280]> var_5416_cast_fp16 = layer_norm(axes = var_5416_axes_0, beta = blocks_24_mlp_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_mlp_ln_weight_to_fp16, x = x_447_cast_fp16)[name = string("op_5416_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_5425_to_fp16 = const()[name = string("op_5425_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259875776)))];
+            tensor<fp16, [5120]> var_5426_to_fp16 = const()[name = string("op_5426_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272983040)))];
+            tensor<fp16, [1, ?, 5120]> linear_198_cast_fp16 = linear(bias = var_5426_to_fp16, weight = var_5425_to_fp16, x = var_5416_cast_fp16)[name = string("linear_198_cast_fp16")];
+            string x_451_mode_0 = const()[name = string("x_451_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_451_cast_fp16 = gelu(mode = x_451_mode_0, x = linear_198_cast_fp16)[name = string("x_451_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_5431_to_fp16 = const()[name = string("op_5431_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272993344)))];
+            tensor<fp16, [1280]> var_5432_to_fp16 = const()[name = string("op_5432_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286100608)))];
+            tensor<fp16, [1, ?, 1280]> linear_199_cast_fp16 = linear(bias = var_5432_to_fp16, weight = var_5431_to_fp16, x = x_451_cast_fp16)[name = string("linear_199_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_453_cast_fp16 = add(x = x_447_cast_fp16, y = linear_199_cast_fp16)[name = string("x_453_cast_fp16")];
+            tensor<int32, [4]> k_cache_101_begin_0 = const()[name = string("k_cache_101_begin_0"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_101_end_0 = const()[name = string("k_cache_101_end_0"), val = tensor<int32, [4]>([26, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_101_end_mask_0 = const()[name = string("k_cache_101_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_101_squeeze_mask_0 = const()[name = string("k_cache_101_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_101_cast_fp16 = slice_by_index(begin = k_cache_101_begin_0, end = k_cache_101_end_0, end_mask = k_cache_101_end_mask_0, squeeze_mask = k_cache_101_squeeze_mask_0, x = coreml_update_state_112)[name = string("k_cache_101_cast_fp16")];
+            tensor<int32, [4]> v_cache_101_begin_0 = const()[name = string("v_cache_101_begin_0"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_101_end_0 = const()[name = string("v_cache_101_end_0"), val = tensor<int32, [4]>([26, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_101_end_mask_0 = const()[name = string("v_cache_101_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_101_squeeze_mask_0 = const()[name = string("v_cache_101_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_101_cast_fp16 = slice_by_index(begin = v_cache_101_begin_0, end = v_cache_101_end_0, end_mask = v_cache_101_end_mask_0, squeeze_mask = v_cache_101_squeeze_mask_0, x = coreml_update_state_113)[name = string("v_cache_101_cast_fp16")];
+            tensor<int32, [4]> k_cache_103_begin_0 = const()[name = string("k_cache_103_begin_0"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_103_end_0 = const()[name = string("k_cache_103_end_0"), val = tensor<int32, [4]>([26, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_103_end_mask_0 = const()[name = string("k_cache_103_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_103_squeeze_mask_0 = const()[name = string("k_cache_103_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_103_cast_fp16 = slice_by_index(begin = k_cache_103_begin_0, end = k_cache_103_end_0, end_mask = k_cache_103_end_mask_0, squeeze_mask = k_cache_103_squeeze_mask_0, x = read_state_2)[name = string("k_cache_103_cast_fp16")];
+            tensor<int32, [4]> v_cache_103_begin_0 = const()[name = string("v_cache_103_begin_0"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_103_end_0 = const()[name = string("v_cache_103_end_0"), val = tensor<int32, [4]>([26, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_103_end_mask_0 = const()[name = string("v_cache_103_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_103_squeeze_mask_0 = const()[name = string("v_cache_103_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_103_cast_fp16 = slice_by_index(begin = v_cache_103_begin_0, end = v_cache_103_end_0, end_mask = v_cache_103_end_mask_0, squeeze_mask = v_cache_103_squeeze_mask_0, x = read_state_3)[name = string("v_cache_103_cast_fp16")];
+            int32 var_5455 = const()[name = string("op_5455"), val = int32(-1)];
+            tensor<int32, [1]> var_5473_axes_0 = const()[name = string("op_5473_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_25_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286103232)))];
+            tensor<fp16, [1280]> blocks_25_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286105856)))];
+            fp16 var_5461_to_fp16 = const()[name = string("op_5461_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_5473_cast_fp16 = layer_norm(axes = var_5473_axes_0, beta = blocks_25_attn_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_attn_ln_weight_to_fp16, x = x_453_cast_fp16)[name = string("op_5473_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5484_to_fp16 = const()[name = string("op_5484_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286108480)))];
+            tensor<fp16, [1280]> var_5485_to_fp16 = const()[name = string("op_5485_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1289385344)))];
+            tensor<fp16, [1, ?, 1280]> linear_200_cast_fp16 = linear(bias = var_5485_to_fp16, weight = var_5484_to_fp16, x = var_5473_cast_fp16)[name = string("linear_200_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5488_to_fp16 = const()[name = string("op_5488_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1289387968)))];
+            tensor<fp16, [1, ?, 1280]> linear_201_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5488_to_fp16, x = var_5473_cast_fp16)[name = string("linear_201_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5492_to_fp16 = const()[name = string("op_5492_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1292664832)))];
+            tensor<fp16, [1280]> var_5493_to_fp16 = const()[name = string("op_5493_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1295941696)))];
+            tensor<fp16, [1, ?, 1280]> linear_202_cast_fp16 = linear(bias = var_5493_to_fp16, weight = var_5492_to_fp16, x = var_5473_cast_fp16)[name = string("linear_202_cast_fp16")];
+            tensor<int32, [3]> var_5495_shape_cast_fp16 = shape(x = linear_200_cast_fp16)[name = string("op_5495_shape_cast_fp16")];
+            int32 gather_302_axis_0 = const()[name = string("gather_302_axis_0"), val = int32(0)];
+            int32 gather_302_batch_dims_0 = const()[name = string("gather_302_batch_dims_0"), val = int32(0)];
+            bool gather_302_validate_indices_0 = const()[name = string("gather_302_validate_indices_0"), val = bool(false)];
+            string var_5495_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5495_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_302_to_uint16 = const()[name = string("select_302_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5495_shape_cast_fp16_to_uint16 = cast(dtype = var_5495_shape_cast_fp16_to_uint16_dtype_0, x = var_5495_shape_cast_fp16)[name = string("cast_340")];
+            uint16 gather_302_cast_uint16 = gather(axis = gather_302_axis_0, batch_dims = gather_302_batch_dims_0, indices = select_302_to_uint16, validate_indices = gather_302_validate_indices_0, x = var_5495_shape_cast_fp16_to_uint16)[name = string("gather_302_cast_uint16")];
+            string gather_302_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_302_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_302_cast_uint16_to_int32 = cast(dtype = gather_302_cast_uint16_to_int32_dtype_0, x = gather_302_cast_uint16)[name = string("cast_339")];
+            int32 end_step_53 = add(x = offset, y = gather_302_cast_uint16_to_int32)[name = string("end_step_53")];
+            tensor<int32, [1]> expand_dims_400 = const()[name = string("expand_dims_400"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_402 = const()[name = string("expand_dims_402"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_403_axes_0 = const()[name = string("expand_dims_403_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_403 = expand_dims(axes = expand_dims_403_axes_0, x = end_step_53)[name = string("expand_dims_403")];
+            tensor<int32, [1]> concat_554_values0_0 = const()[name = string("concat_554_values0_0"), val = tensor<int32, [1]>([25])];
+            int32 concat_554_axis_0 = const()[name = string("concat_554_axis_0"), val = int32(0)];
+            bool concat_554_interleave_0 = const()[name = string("concat_554_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_554 = concat(axis = concat_554_axis_0, interleave = concat_554_interleave_0, values = (concat_554_values0_0, expand_dims_400, expand_dims_1, expand_dims_402))[name = string("concat_554")];
+            tensor<int32, [1]> concat_555_values0_0 = const()[name = string("concat_555_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_555_values1_0 = const()[name = string("concat_555_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_555_values3_0 = const()[name = string("concat_555_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_555_axis_0 = const()[name = string("concat_555_axis_0"), val = int32(0)];
+            bool concat_555_interleave_0 = const()[name = string("concat_555_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_555 = concat(axis = concat_555_axis_0, interleave = concat_555_interleave_0, values = (concat_555_values0_0, concat_555_values1_0, expand_dims_403, concat_555_values3_0))[name = string("concat_555")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_26_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_26_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_26_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_26_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_554, begin_mask = k_cache1_internal_tensor_assign_26_begin_mask_0, end = concat_555, end_mask = k_cache1_internal_tensor_assign_26_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_26_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_26_stride_0, update = linear_201_cast_fp16, x = coreml_update_state_112)[name = string("k_cache1_internal_tensor_assign_26_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_26_cast_fp16, input = k_cache1)[name = string("coreml_update_state_114_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_114 = read_state(input = k_cache1)[name = string("coreml_update_state_114")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_26_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_26_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_26_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_26_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_554, begin_mask = v_cache1_internal_tensor_assign_26_begin_mask_0, end = concat_555, end_mask = v_cache1_internal_tensor_assign_26_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_26_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_26_stride_0, update = linear_202_cast_fp16, x = coreml_update_state_113)[name = string("v_cache1_internal_tensor_assign_26_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_26_cast_fp16, input = v_cache1)[name = string("coreml_update_state_115_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_115 = read_state(input = v_cache1)[name = string("coreml_update_state_115")];
+            int32 concat_560_values0_0 = const()[name = string("concat_560_values0_0"), val = int32(1)];
+            int32 concat_560_values2_0 = const()[name = string("concat_560_values2_0"), val = int32(1280)];
+            int32 concat_560_axis_0 = const()[name = string("concat_560_axis_0"), val = int32(0)];
+            bool concat_560_interleave_0 = const()[name = string("concat_560_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_560 = concat(axis = concat_560_axis_0, interleave = concat_560_interleave_0, values = (concat_560_values0_0, end_step_53, concat_560_values2_0))[name = string("concat_560")];
+            tensor<int32, [3]> var_5511_begin_0 = const()[name = string("op_5511_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5511_end_mask_0 = const()[name = string("op_5511_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5511_cast_fp16 = slice_by_index(begin = var_5511_begin_0, end = concat_560, end_mask = var_5511_end_mask_0, x = k_cache_101_cast_fp16)[name = string("op_5511_cast_fp16")];
+            tensor<int32, [3]> var_5514_begin_0 = const()[name = string("op_5514_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5514_end_mask_0 = const()[name = string("op_5514_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5514_cast_fp16 = slice_by_index(begin = var_5514_begin_0, end = concat_560, end_mask = var_5514_end_mask_0, x = v_cache_101_cast_fp16)[name = string("op_5514_cast_fp16")];
+            tensor<int32, [4]> concat_562x = const()[name = string("concat_562x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5524_cast_fp16 = reshape(shape = concat_562x, x = linear_200_cast_fp16)[name = string("op_5524_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_203_cast_fp16 = mul(x = var_5524_cast_fp16, y = const_260_to_fp16)[name = string("q_203_cast_fp16")];
+            tensor<int32, [4]> concat_563x = const()[name = string("concat_563x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5531_cast_fp16 = reshape(shape = concat_563x, x = var_5511_cast_fp16)[name = string("op_5531_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_255_cast_fp16 = mul(x = var_5531_cast_fp16, y = const_261_to_fp16)[name = string("k_255_cast_fp16")];
+            tensor<int32, [4]> concat_564x = const()[name = string("concat_564x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5538_cast_fp16 = reshape(shape = concat_564x, x = var_5514_cast_fp16)[name = string("op_5538_cast_fp16")];
+            tensor<int32, [4]> var_5539 = const()[name = string("op_5539"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_151_transpose_x_0 = const()[name = string("qk_151_transpose_x_0"), val = bool(false)];
+            bool qk_151_transpose_y_0 = const()[name = string("qk_151_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_357_perm_0 = const()[name = string("transpose_357_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_358_perm_0 = const()[name = string("transpose_358_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_358 = transpose(perm = transpose_358_perm_0, x = k_255_cast_fp16)[name = string("transpose_438")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_357 = transpose(perm = transpose_357_perm_0, x = q_203_cast_fp16)[name = string("transpose_439")];
+            tensor<fp16, [1, 20, ?, ?]> qk_151_cast_fp16 = matmul(transpose_x = qk_151_transpose_x_0, transpose_y = qk_151_transpose_y_0, x = transpose_357, y = transpose_358)[name = string("qk_151_cast_fp16")];
+            int32 concat_565_values1_0 = const()[name = string("concat_565_values1_0"), val = int32(448)];
+            int32 concat_565_axis_0 = const()[name = string("concat_565_axis_0"), val = int32(0)];
+            bool concat_565_interleave_0 = const()[name = string("concat_565_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_565 = concat(axis = concat_565_axis_0, interleave = concat_565_interleave_0, values = (gather_302_cast_uint16_to_int32, concat_565_values1_0))[name = string("concat_565")];
+            tensor<int32, [2]> var_5542_begin_0 = const()[name = string("op_5542_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5542_end_mask_0 = const()[name = string("op_5542_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5542_cast_fp16 = slice_by_index(begin = var_5542_begin_0, end = concat_565, end_mask = var_5542_end_mask_0, x = mask_to_fp16)[name = string("op_5542_cast_fp16")];
+            int32 concat_566_values0_0 = const()[name = string("concat_566_values0_0"), val = int32(0)];
+            int32 concat_566_axis_0 = const()[name = string("concat_566_axis_0"), val = int32(0)];
+            bool concat_566_interleave_0 = const()[name = string("concat_566_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_566 = concat(axis = concat_566_axis_0, interleave = concat_566_interleave_0, values = (concat_566_values0_0, gather_302_cast_uint16_to_int32))[name = string("concat_566")];
+            tensor<int32, [2]> var_5543_begin_0 = const()[name = string("op_5543_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5543_end_mask_0 = const()[name = string("op_5543_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5543_cast_fp16 = slice_by_index(begin = var_5543_begin_0, end = concat_566, end_mask = var_5543_end_mask_0, x = var_5542_cast_fp16)[name = string("op_5543_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_153_cast_fp16 = add(x = qk_151_cast_fp16, y = var_5543_cast_fp16)[name = string("qk_153_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_5546_cast_fp16 = softmax(axis = var_5455, x = qk_153_cast_fp16)[name = string("op_5546_cast_fp16")];
+            bool var_5548_transpose_x_0 = const()[name = string("op_5548_transpose_x_0"), val = bool(false)];
+            bool var_5548_transpose_y_0 = const()[name = string("op_5548_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_255_cast_fp16 = transpose(perm = var_5539, x = var_5538_cast_fp16)[name = string("transpose_440")];
+            tensor<fp16, [1, 20, ?, 64]> var_5548_cast_fp16 = matmul(transpose_x = var_5548_transpose_x_0, transpose_y = var_5548_transpose_y_0, x = var_5546_cast_fp16, y = v_255_cast_fp16)[name = string("op_5548_cast_fp16")];
+            tensor<int32, [4]> var_5549 = const()[name = string("op_5549"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_567x = const()[name = string("concat_567x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5550_cast_fp16 = transpose(perm = var_5549, x = var_5548_cast_fp16)[name = string("transpose_437")];
+            tensor<fp16, [1, ?, 1280]> x_457_cast_fp16 = reshape(shape = concat_567x, x = var_5550_cast_fp16)[name = string("x_457_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5554_to_fp16 = const()[name = string("op_5554_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1295944320)))];
+            tensor<fp16, [1280]> var_5555_to_fp16 = const()[name = string("op_5555_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299221184)))];
+            tensor<fp16, [1, ?, 1280]> linear_203_cast_fp16 = linear(bias = var_5555_to_fp16, weight = var_5554_to_fp16, x = x_457_cast_fp16)[name = string("linear_203_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_459_cast_fp16 = add(x = x_453_cast_fp16, y = linear_203_cast_fp16)[name = string("x_459_cast_fp16")];
+            tensor<int32, [1]> var_5562_axes_0 = const()[name = string("op_5562_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_25_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299223808)))];
+            tensor<fp16, [1280]> blocks_25_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299226432)))];
+            tensor<fp16, [1, ?, 1280]> var_5562_cast_fp16 = layer_norm(axes = var_5562_axes_0, beta = blocks_25_cross_attn_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_cross_attn_ln_weight_to_fp16, x = x_459_cast_fp16)[name = string("op_5562_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5571_to_fp16 = const()[name = string("op_5571_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299229056)))];
+            tensor<fp16, [1280]> var_5572_to_fp16 = const()[name = string("op_5572_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1302505920)))];
+            tensor<fp16, [1, ?, 1280]> linear_204_cast_fp16 = linear(bias = var_5572_to_fp16, weight = var_5571_to_fp16, x = var_5562_cast_fp16)[name = string("linear_204_cast_fp16")];
+            tensor<int32, [3]> concat_568 = const()[name = string("concat_568"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_569 = const()[name = string("concat_569"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_257_internal_tensor_assign_1_stride_0 = const()[name = string("k_257_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_257_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_257_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_257_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_257_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_568, begin_mask = k_257_internal_tensor_assign_1_begin_mask_0, end = concat_569, end_mask = k_257_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_257_internal_tensor_assign_1_squeeze_mask_0, stride = k_257_internal_tensor_assign_1_stride_0, update = k_cache_103_cast_fp16, x = k_7_to_fp16)[name = string("k_257_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_570 = const()[name = string("concat_570"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_571 = const()[name = string("concat_571"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_257_internal_tensor_assign_1_stride_0 = const()[name = string("v_257_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_257_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_257_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_257_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_257_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_570, begin_mask = v_257_internal_tensor_assign_1_begin_mask_0, end = concat_571, end_mask = v_257_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_257_internal_tensor_assign_1_squeeze_mask_0, stride = v_257_internal_tensor_assign_1_stride_0, update = v_cache_103_cast_fp16, x = k_7_to_fp16)[name = string("v_257_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_572x = const()[name = string("concat_572x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5592_cast_fp16 = reshape(shape = concat_572x, x = linear_204_cast_fp16)[name = string("op_5592_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_262_to_fp16 = const()[name = string("const_262_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_207_cast_fp16 = mul(x = var_5592_cast_fp16, y = const_262_to_fp16)[name = string("q_207_cast_fp16")];
+            tensor<int32, [4]> var_5598 = const()[name = string("op_5598"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5599_cast_fp16 = reshape(shape = var_5598, x = k_257_internal_tensor_assign_1_cast_fp16)[name = string("op_5599_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_259_cast_fp16 = mul(x = var_5599_cast_fp16, y = const_263_to_fp16)[name = string("k_259_cast_fp16")];
+            tensor<int32, [4]> var_5605 = const()[name = string("op_5605"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5606_cast_fp16 = reshape(shape = var_5605, x = v_257_internal_tensor_assign_1_cast_fp16)[name = string("op_5606_cast_fp16")];
+            tensor<int32, [4]> var_5607 = const()[name = string("op_5607"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_155_transpose_x_0 = const()[name = string("qk_155_transpose_x_0"), val = bool(false)];
+            bool qk_155_transpose_y_0 = const()[name = string("qk_155_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_359_perm_0 = const()[name = string("transpose_359_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_360_perm_0 = const()[name = string("transpose_360_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_360 = transpose(perm = transpose_360_perm_0, x = k_259_cast_fp16)[name = string("transpose_434")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_359 = transpose(perm = transpose_359_perm_0, x = q_207_cast_fp16)[name = string("transpose_435")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_155_cast_fp16 = matmul(transpose_x = qk_155_transpose_x_0, transpose_y = qk_155_transpose_y_0, x = transpose_359, y = transpose_360)[name = string("qk_155_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_5611_cast_fp16 = softmax(axis = var_5455, x = qk_155_cast_fp16)[name = string("op_5611_cast_fp16")];
+            bool var_5613_transpose_x_0 = const()[name = string("op_5613_transpose_x_0"), val = bool(false)];
+            bool var_5613_transpose_y_0 = const()[name = string("op_5613_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_259_cast_fp16 = transpose(perm = var_5607, x = var_5606_cast_fp16)[name = string("transpose_436")];
+            tensor<fp16, [1, 20, ?, 64]> var_5613_cast_fp16 = matmul(transpose_x = var_5613_transpose_x_0, transpose_y = var_5613_transpose_y_0, x = var_5611_cast_fp16, y = v_259_cast_fp16)[name = string("op_5613_cast_fp16")];
+            tensor<int32, [4]> var_5614 = const()[name = string("op_5614"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_573x = const()[name = string("concat_573x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5615_cast_fp16 = transpose(perm = var_5614, x = var_5613_cast_fp16)[name = string("transpose_433")];
+            tensor<fp16, [1, ?, 1280]> x_463_cast_fp16 = reshape(shape = concat_573x, x = var_5615_cast_fp16)[name = string("x_463_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5619_to_fp16 = const()[name = string("op_5619_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1302508544)))];
+            tensor<fp16, [1280]> var_5620_to_fp16 = const()[name = string("op_5620_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305785408)))];
+            tensor<fp16, [1, ?, 1280]> linear_205_cast_fp16 = linear(bias = var_5620_to_fp16, weight = var_5619_to_fp16, x = x_463_cast_fp16)[name = string("linear_205_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_465_cast_fp16 = add(x = x_459_cast_fp16, y = linear_205_cast_fp16)[name = string("x_465_cast_fp16")];
+            tensor<int32, [1]> var_5627_axes_0 = const()[name = string("op_5627_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_25_mlp_ln_weight_to_fp16 = const()[name = string("blocks_25_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305788032)))];
+            tensor<fp16, [1280]> blocks_25_mlp_ln_bias_to_fp16 = const()[name = string("blocks_25_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305790656)))];
+            tensor<fp16, [1, ?, 1280]> var_5627_cast_fp16 = layer_norm(axes = var_5627_axes_0, beta = blocks_25_mlp_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_mlp_ln_weight_to_fp16, x = x_465_cast_fp16)[name = string("op_5627_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_5636_to_fp16 = const()[name = string("op_5636_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305793280)))];
+            tensor<fp16, [5120]> var_5637_to_fp16 = const()[name = string("op_5637_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1318900544)))];
+            tensor<fp16, [1, ?, 5120]> linear_206_cast_fp16 = linear(bias = var_5637_to_fp16, weight = var_5636_to_fp16, x = var_5627_cast_fp16)[name = string("linear_206_cast_fp16")];
+            string x_469_mode_0 = const()[name = string("x_469_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_469_cast_fp16 = gelu(mode = x_469_mode_0, x = linear_206_cast_fp16)[name = string("x_469_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_5642_to_fp16 = const()[name = string("op_5642_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1318910848)))];
+            tensor<fp16, [1280]> var_5643_to_fp16 = const()[name = string("op_5643_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332018112)))];
+            tensor<fp16, [1, ?, 1280]> linear_207_cast_fp16 = linear(bias = var_5643_to_fp16, weight = var_5642_to_fp16, x = x_469_cast_fp16)[name = string("linear_207_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_471_cast_fp16 = add(x = x_465_cast_fp16, y = linear_207_cast_fp16)[name = string("x_471_cast_fp16")];
+            tensor<int32, [4]> k_cache_105_begin_0 = const()[name = string("k_cache_105_begin_0"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_105_end_0 = const()[name = string("k_cache_105_end_0"), val = tensor<int32, [4]>([27, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_105_end_mask_0 = const()[name = string("k_cache_105_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_105_squeeze_mask_0 = const()[name = string("k_cache_105_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_105_cast_fp16 = slice_by_index(begin = k_cache_105_begin_0, end = k_cache_105_end_0, end_mask = k_cache_105_end_mask_0, squeeze_mask = k_cache_105_squeeze_mask_0, x = coreml_update_state_114)[name = string("k_cache_105_cast_fp16")];
+            tensor<int32, [4]> v_cache_105_begin_0 = const()[name = string("v_cache_105_begin_0"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_105_end_0 = const()[name = string("v_cache_105_end_0"), val = tensor<int32, [4]>([27, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_105_end_mask_0 = const()[name = string("v_cache_105_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_105_squeeze_mask_0 = const()[name = string("v_cache_105_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_105_cast_fp16 = slice_by_index(begin = v_cache_105_begin_0, end = v_cache_105_end_0, end_mask = v_cache_105_end_mask_0, squeeze_mask = v_cache_105_squeeze_mask_0, x = coreml_update_state_115)[name = string("v_cache_105_cast_fp16")];
+            tensor<int32, [4]> k_cache_107_begin_0 = const()[name = string("k_cache_107_begin_0"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_107_end_0 = const()[name = string("k_cache_107_end_0"), val = tensor<int32, [4]>([27, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_107_end_mask_0 = const()[name = string("k_cache_107_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_107_squeeze_mask_0 = const()[name = string("k_cache_107_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_107_cast_fp16 = slice_by_index(begin = k_cache_107_begin_0, end = k_cache_107_end_0, end_mask = k_cache_107_end_mask_0, squeeze_mask = k_cache_107_squeeze_mask_0, x = read_state_2)[name = string("k_cache_107_cast_fp16")];
+            tensor<int32, [4]> v_cache_107_begin_0 = const()[name = string("v_cache_107_begin_0"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_107_end_0 = const()[name = string("v_cache_107_end_0"), val = tensor<int32, [4]>([27, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_107_end_mask_0 = const()[name = string("v_cache_107_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_107_squeeze_mask_0 = const()[name = string("v_cache_107_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_107_cast_fp16 = slice_by_index(begin = v_cache_107_begin_0, end = v_cache_107_end_0, end_mask = v_cache_107_end_mask_0, squeeze_mask = v_cache_107_squeeze_mask_0, x = read_state_3)[name = string("v_cache_107_cast_fp16")];
+            int32 var_5666 = const()[name = string("op_5666"), val = int32(-1)];
+            tensor<int32, [1]> var_5684_axes_0 = const()[name = string("op_5684_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_26_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332020736)))];
+            tensor<fp16, [1280]> blocks_26_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332023360)))];
+            fp16 var_5672_to_fp16 = const()[name = string("op_5672_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_5684_cast_fp16 = layer_norm(axes = var_5684_axes_0, beta = blocks_26_attn_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_attn_ln_weight_to_fp16, x = x_471_cast_fp16)[name = string("op_5684_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5695_to_fp16 = const()[name = string("op_5695_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332025984)))];
+            tensor<fp16, [1280]> var_5696_to_fp16 = const()[name = string("op_5696_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1335302848)))];
+            tensor<fp16, [1, ?, 1280]> linear_208_cast_fp16 = linear(bias = var_5696_to_fp16, weight = var_5695_to_fp16, x = var_5684_cast_fp16)[name = string("linear_208_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5699_to_fp16 = const()[name = string("op_5699_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1335305472)))];
+            tensor<fp16, [1, ?, 1280]> linear_209_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5699_to_fp16, x = var_5684_cast_fp16)[name = string("linear_209_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5703_to_fp16 = const()[name = string("op_5703_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1338582336)))];
+            tensor<fp16, [1280]> var_5704_to_fp16 = const()[name = string("op_5704_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1341859200)))];
+            tensor<fp16, [1, ?, 1280]> linear_210_cast_fp16 = linear(bias = var_5704_to_fp16, weight = var_5703_to_fp16, x = var_5684_cast_fp16)[name = string("linear_210_cast_fp16")];
+            tensor<int32, [3]> var_5706_shape_cast_fp16 = shape(x = linear_208_cast_fp16)[name = string("op_5706_shape_cast_fp16")];
+            int32 gather_314_axis_0 = const()[name = string("gather_314_axis_0"), val = int32(0)];
+            int32 gather_314_batch_dims_0 = const()[name = string("gather_314_batch_dims_0"), val = int32(0)];
+            bool gather_314_validate_indices_0 = const()[name = string("gather_314_validate_indices_0"), val = bool(false)];
+            string var_5706_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5706_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_314_to_uint16 = const()[name = string("select_314_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5706_shape_cast_fp16_to_uint16 = cast(dtype = var_5706_shape_cast_fp16_to_uint16_dtype_0, x = var_5706_shape_cast_fp16)[name = string("cast_338")];
+            uint16 gather_314_cast_uint16 = gather(axis = gather_314_axis_0, batch_dims = gather_314_batch_dims_0, indices = select_314_to_uint16, validate_indices = gather_314_validate_indices_0, x = var_5706_shape_cast_fp16_to_uint16)[name = string("gather_314_cast_uint16")];
+            string gather_314_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_314_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_314_cast_uint16_to_int32 = cast(dtype = gather_314_cast_uint16_to_int32_dtype_0, x = gather_314_cast_uint16)[name = string("cast_337")];
+            int32 end_step_55 = add(x = offset, y = gather_314_cast_uint16_to_int32)[name = string("end_step_55")];
+            tensor<int32, [1]> expand_dims_416 = const()[name = string("expand_dims_416"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_418 = const()[name = string("expand_dims_418"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_419_axes_0 = const()[name = string("expand_dims_419_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_419 = expand_dims(axes = expand_dims_419_axes_0, x = end_step_55)[name = string("expand_dims_419")];
+            tensor<int32, [1]> concat_576_values0_0 = const()[name = string("concat_576_values0_0"), val = tensor<int32, [1]>([26])];
+            int32 concat_576_axis_0 = const()[name = string("concat_576_axis_0"), val = int32(0)];
+            bool concat_576_interleave_0 = const()[name = string("concat_576_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_576 = concat(axis = concat_576_axis_0, interleave = concat_576_interleave_0, values = (concat_576_values0_0, expand_dims_416, expand_dims_1, expand_dims_418))[name = string("concat_576")];
+            tensor<int32, [1]> concat_577_values0_0 = const()[name = string("concat_577_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_577_values1_0 = const()[name = string("concat_577_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_577_values3_0 = const()[name = string("concat_577_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_577_axis_0 = const()[name = string("concat_577_axis_0"), val = int32(0)];
+            bool concat_577_interleave_0 = const()[name = string("concat_577_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_577 = concat(axis = concat_577_axis_0, interleave = concat_577_interleave_0, values = (concat_577_values0_0, concat_577_values1_0, expand_dims_419, concat_577_values3_0))[name = string("concat_577")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_27_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_27_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_27_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_27_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_576, begin_mask = k_cache1_internal_tensor_assign_27_begin_mask_0, end = concat_577, end_mask = k_cache1_internal_tensor_assign_27_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_27_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_27_stride_0, update = linear_209_cast_fp16, x = coreml_update_state_114)[name = string("k_cache1_internal_tensor_assign_27_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_27_cast_fp16, input = k_cache1)[name = string("coreml_update_state_116_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_116 = read_state(input = k_cache1)[name = string("coreml_update_state_116")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_27_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_27_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_27_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_27_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_576, begin_mask = v_cache1_internal_tensor_assign_27_begin_mask_0, end = concat_577, end_mask = v_cache1_internal_tensor_assign_27_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_27_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_27_stride_0, update = linear_210_cast_fp16, x = coreml_update_state_115)[name = string("v_cache1_internal_tensor_assign_27_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_27_cast_fp16, input = v_cache1)[name = string("coreml_update_state_117_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_117 = read_state(input = v_cache1)[name = string("coreml_update_state_117")];
+            int32 concat_582_values0_0 = const()[name = string("concat_582_values0_0"), val = int32(1)];
+            int32 concat_582_values2_0 = const()[name = string("concat_582_values2_0"), val = int32(1280)];
+            int32 concat_582_axis_0 = const()[name = string("concat_582_axis_0"), val = int32(0)];
+            bool concat_582_interleave_0 = const()[name = string("concat_582_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_582 = concat(axis = concat_582_axis_0, interleave = concat_582_interleave_0, values = (concat_582_values0_0, end_step_55, concat_582_values2_0))[name = string("concat_582")];
+            tensor<int32, [3]> var_5722_begin_0 = const()[name = string("op_5722_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5722_end_mask_0 = const()[name = string("op_5722_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5722_cast_fp16 = slice_by_index(begin = var_5722_begin_0, end = concat_582, end_mask = var_5722_end_mask_0, x = k_cache_105_cast_fp16)[name = string("op_5722_cast_fp16")];
+            tensor<int32, [3]> var_5725_begin_0 = const()[name = string("op_5725_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5725_end_mask_0 = const()[name = string("op_5725_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5725_cast_fp16 = slice_by_index(begin = var_5725_begin_0, end = concat_582, end_mask = var_5725_end_mask_0, x = v_cache_105_cast_fp16)[name = string("op_5725_cast_fp16")];
+            tensor<int32, [4]> concat_584x = const()[name = string("concat_584x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5735_cast_fp16 = reshape(shape = concat_584x, x = linear_208_cast_fp16)[name = string("op_5735_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_264_to_fp16 = const()[name = string("const_264_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_211_cast_fp16 = mul(x = var_5735_cast_fp16, y = const_264_to_fp16)[name = string("q_211_cast_fp16")];
+            tensor<int32, [4]> concat_585x = const()[name = string("concat_585x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5742_cast_fp16 = reshape(shape = concat_585x, x = var_5722_cast_fp16)[name = string("op_5742_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_265_to_fp16 = const()[name = string("const_265_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_265_cast_fp16 = mul(x = var_5742_cast_fp16, y = const_265_to_fp16)[name = string("k_265_cast_fp16")];
+            tensor<int32, [4]> concat_586x = const()[name = string("concat_586x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5749_cast_fp16 = reshape(shape = concat_586x, x = var_5725_cast_fp16)[name = string("op_5749_cast_fp16")];
+            tensor<int32, [4]> var_5750 = const()[name = string("op_5750"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_157_transpose_x_0 = const()[name = string("qk_157_transpose_x_0"), val = bool(false)];
+            bool qk_157_transpose_y_0 = const()[name = string("qk_157_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_361_perm_0 = const()[name = string("transpose_361_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_362_perm_0 = const()[name = string("transpose_362_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_362 = transpose(perm = transpose_362_perm_0, x = k_265_cast_fp16)[name = string("transpose_430")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_361 = transpose(perm = transpose_361_perm_0, x = q_211_cast_fp16)[name = string("transpose_431")];
+            tensor<fp16, [1, 20, ?, ?]> qk_157_cast_fp16 = matmul(transpose_x = qk_157_transpose_x_0, transpose_y = qk_157_transpose_y_0, x = transpose_361, y = transpose_362)[name = string("qk_157_cast_fp16")];
+            int32 concat_587_values1_0 = const()[name = string("concat_587_values1_0"), val = int32(448)];
+            int32 concat_587_axis_0 = const()[name = string("concat_587_axis_0"), val = int32(0)];
+            bool concat_587_interleave_0 = const()[name = string("concat_587_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_587 = concat(axis = concat_587_axis_0, interleave = concat_587_interleave_0, values = (gather_314_cast_uint16_to_int32, concat_587_values1_0))[name = string("concat_587")];
+            tensor<int32, [2]> var_5753_begin_0 = const()[name = string("op_5753_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5753_end_mask_0 = const()[name = string("op_5753_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5753_cast_fp16 = slice_by_index(begin = var_5753_begin_0, end = concat_587, end_mask = var_5753_end_mask_0, x = mask_to_fp16)[name = string("op_5753_cast_fp16")];
+            int32 concat_588_values0_0 = const()[name = string("concat_588_values0_0"), val = int32(0)];
+            int32 concat_588_axis_0 = const()[name = string("concat_588_axis_0"), val = int32(0)];
+            bool concat_588_interleave_0 = const()[name = string("concat_588_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_588 = concat(axis = concat_588_axis_0, interleave = concat_588_interleave_0, values = (concat_588_values0_0, gather_314_cast_uint16_to_int32))[name = string("concat_588")];
+            tensor<int32, [2]> var_5754_begin_0 = const()[name = string("op_5754_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5754_end_mask_0 = const()[name = string("op_5754_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5754_cast_fp16 = slice_by_index(begin = var_5754_begin_0, end = concat_588, end_mask = var_5754_end_mask_0, x = var_5753_cast_fp16)[name = string("op_5754_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_159_cast_fp16 = add(x = qk_157_cast_fp16, y = var_5754_cast_fp16)[name = string("qk_159_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_5757_cast_fp16 = softmax(axis = var_5666, x = qk_159_cast_fp16)[name = string("op_5757_cast_fp16")];
+            bool var_5759_transpose_x_0 = const()[name = string("op_5759_transpose_x_0"), val = bool(false)];
+            bool var_5759_transpose_y_0 = const()[name = string("op_5759_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_265_cast_fp16 = transpose(perm = var_5750, x = var_5749_cast_fp16)[name = string("transpose_432")];
+            tensor<fp16, [1, 20, ?, 64]> var_5759_cast_fp16 = matmul(transpose_x = var_5759_transpose_x_0, transpose_y = var_5759_transpose_y_0, x = var_5757_cast_fp16, y = v_265_cast_fp16)[name = string("op_5759_cast_fp16")];
+            tensor<int32, [4]> var_5760 = const()[name = string("op_5760"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_589x = const()[name = string("concat_589x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5761_cast_fp16 = transpose(perm = var_5760, x = var_5759_cast_fp16)[name = string("transpose_429")];
+            tensor<fp16, [1, ?, 1280]> x_475_cast_fp16 = reshape(shape = concat_589x, x = var_5761_cast_fp16)[name = string("x_475_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5765_to_fp16 = const()[name = string("op_5765_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1341861824)))];
+            tensor<fp16, [1280]> var_5766_to_fp16 = const()[name = string("op_5766_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345138688)))];
+            tensor<fp16, [1, ?, 1280]> linear_211_cast_fp16 = linear(bias = var_5766_to_fp16, weight = var_5765_to_fp16, x = x_475_cast_fp16)[name = string("linear_211_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_477_cast_fp16 = add(x = x_471_cast_fp16, y = linear_211_cast_fp16)[name = string("x_477_cast_fp16")];
+            tensor<int32, [1]> var_5773_axes_0 = const()[name = string("op_5773_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_26_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345141312)))];
+            tensor<fp16, [1280]> blocks_26_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345143936)))];
+            tensor<fp16, [1, ?, 1280]> var_5773_cast_fp16 = layer_norm(axes = var_5773_axes_0, beta = blocks_26_cross_attn_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_cross_attn_ln_weight_to_fp16, x = x_477_cast_fp16)[name = string("op_5773_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5782_to_fp16 = const()[name = string("op_5782_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345146560)))];
+            tensor<fp16, [1280]> var_5783_to_fp16 = const()[name = string("op_5783_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348423424)))];
+            tensor<fp16, [1, ?, 1280]> linear_212_cast_fp16 = linear(bias = var_5783_to_fp16, weight = var_5782_to_fp16, x = var_5773_cast_fp16)[name = string("linear_212_cast_fp16")];
+            tensor<int32, [3]> concat_590 = const()[name = string("concat_590"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_591 = const()[name = string("concat_591"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_267_internal_tensor_assign_1_stride_0 = const()[name = string("k_267_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_267_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_267_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_267_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_267_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_590, begin_mask = k_267_internal_tensor_assign_1_begin_mask_0, end = concat_591, end_mask = k_267_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_267_internal_tensor_assign_1_squeeze_mask_0, stride = k_267_internal_tensor_assign_1_stride_0, update = k_cache_107_cast_fp16, x = k_7_to_fp16)[name = string("k_267_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_592 = const()[name = string("concat_592"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_593 = const()[name = string("concat_593"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_267_internal_tensor_assign_1_stride_0 = const()[name = string("v_267_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_267_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_267_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_267_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_267_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_592, begin_mask = v_267_internal_tensor_assign_1_begin_mask_0, end = concat_593, end_mask = v_267_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_267_internal_tensor_assign_1_squeeze_mask_0, stride = v_267_internal_tensor_assign_1_stride_0, update = v_cache_107_cast_fp16, x = k_7_to_fp16)[name = string("v_267_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_594x = const()[name = string("concat_594x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5803_cast_fp16 = reshape(shape = concat_594x, x = linear_212_cast_fp16)[name = string("op_5803_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_266_to_fp16 = const()[name = string("const_266_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_215_cast_fp16 = mul(x = var_5803_cast_fp16, y = const_266_to_fp16)[name = string("q_215_cast_fp16")];
+            tensor<int32, [4]> var_5809 = const()[name = string("op_5809"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5810_cast_fp16 = reshape(shape = var_5809, x = k_267_internal_tensor_assign_1_cast_fp16)[name = string("op_5810_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_269_cast_fp16 = mul(x = var_5810_cast_fp16, y = const_267_to_fp16)[name = string("k_269_cast_fp16")];
+            tensor<int32, [4]> var_5816 = const()[name = string("op_5816"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5817_cast_fp16 = reshape(shape = var_5816, x = v_267_internal_tensor_assign_1_cast_fp16)[name = string("op_5817_cast_fp16")];
+            tensor<int32, [4]> var_5818 = const()[name = string("op_5818"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_161_transpose_x_0 = const()[name = string("qk_161_transpose_x_0"), val = bool(false)];
+            bool qk_161_transpose_y_0 = const()[name = string("qk_161_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_363_perm_0 = const()[name = string("transpose_363_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_364_perm_0 = const()[name = string("transpose_364_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_364 = transpose(perm = transpose_364_perm_0, x = k_269_cast_fp16)[name = string("transpose_426")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_363 = transpose(perm = transpose_363_perm_0, x = q_215_cast_fp16)[name = string("transpose_427")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_161_cast_fp16 = matmul(transpose_x = qk_161_transpose_x_0, transpose_y = qk_161_transpose_y_0, x = transpose_363, y = transpose_364)[name = string("qk_161_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_5822_cast_fp16 = softmax(axis = var_5666, x = qk_161_cast_fp16)[name = string("op_5822_cast_fp16")];
+            bool var_5824_transpose_x_0 = const()[name = string("op_5824_transpose_x_0"), val = bool(false)];
+            bool var_5824_transpose_y_0 = const()[name = string("op_5824_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_269_cast_fp16 = transpose(perm = var_5818, x = var_5817_cast_fp16)[name = string("transpose_428")];
+            tensor<fp16, [1, 20, ?, 64]> var_5824_cast_fp16 = matmul(transpose_x = var_5824_transpose_x_0, transpose_y = var_5824_transpose_y_0, x = var_5822_cast_fp16, y = v_269_cast_fp16)[name = string("op_5824_cast_fp16")];
+            tensor<int32, [4]> var_5825 = const()[name = string("op_5825"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_595x = const()[name = string("concat_595x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5826_cast_fp16 = transpose(perm = var_5825, x = var_5824_cast_fp16)[name = string("transpose_425")];
+            tensor<fp16, [1, ?, 1280]> x_481_cast_fp16 = reshape(shape = concat_595x, x = var_5826_cast_fp16)[name = string("x_481_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5830_to_fp16 = const()[name = string("op_5830_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348426048)))];
+            tensor<fp16, [1280]> var_5831_to_fp16 = const()[name = string("op_5831_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351702912)))];
+            tensor<fp16, [1, ?, 1280]> linear_213_cast_fp16 = linear(bias = var_5831_to_fp16, weight = var_5830_to_fp16, x = x_481_cast_fp16)[name = string("linear_213_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_483_cast_fp16 = add(x = x_477_cast_fp16, y = linear_213_cast_fp16)[name = string("x_483_cast_fp16")];
+            tensor<int32, [1]> var_5838_axes_0 = const()[name = string("op_5838_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_26_mlp_ln_weight_to_fp16 = const()[name = string("blocks_26_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351705536)))];
+            tensor<fp16, [1280]> blocks_26_mlp_ln_bias_to_fp16 = const()[name = string("blocks_26_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351708160)))];
+            tensor<fp16, [1, ?, 1280]> var_5838_cast_fp16 = layer_norm(axes = var_5838_axes_0, beta = blocks_26_mlp_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_mlp_ln_weight_to_fp16, x = x_483_cast_fp16)[name = string("op_5838_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_5847_to_fp16 = const()[name = string("op_5847_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351710784)))];
+            tensor<fp16, [5120]> var_5848_to_fp16 = const()[name = string("op_5848_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1364818048)))];
+            tensor<fp16, [1, ?, 5120]> linear_214_cast_fp16 = linear(bias = var_5848_to_fp16, weight = var_5847_to_fp16, x = var_5838_cast_fp16)[name = string("linear_214_cast_fp16")];
+            string x_487_mode_0 = const()[name = string("x_487_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_487_cast_fp16 = gelu(mode = x_487_mode_0, x = linear_214_cast_fp16)[name = string("x_487_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_5853_to_fp16 = const()[name = string("op_5853_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1364828352)))];
+            tensor<fp16, [1280]> var_5854_to_fp16 = const()[name = string("op_5854_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377935616)))];
+            tensor<fp16, [1, ?, 1280]> linear_215_cast_fp16 = linear(bias = var_5854_to_fp16, weight = var_5853_to_fp16, x = x_487_cast_fp16)[name = string("linear_215_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_489_cast_fp16 = add(x = x_483_cast_fp16, y = linear_215_cast_fp16)[name = string("x_489_cast_fp16")];
+            tensor<int32, [4]> k_cache_109_begin_0 = const()[name = string("k_cache_109_begin_0"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_109_end_0 = const()[name = string("k_cache_109_end_0"), val = tensor<int32, [4]>([28, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_109_end_mask_0 = const()[name = string("k_cache_109_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_109_squeeze_mask_0 = const()[name = string("k_cache_109_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_109_cast_fp16 = slice_by_index(begin = k_cache_109_begin_0, end = k_cache_109_end_0, end_mask = k_cache_109_end_mask_0, squeeze_mask = k_cache_109_squeeze_mask_0, x = coreml_update_state_116)[name = string("k_cache_109_cast_fp16")];
+            tensor<int32, [4]> v_cache_109_begin_0 = const()[name = string("v_cache_109_begin_0"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_109_end_0 = const()[name = string("v_cache_109_end_0"), val = tensor<int32, [4]>([28, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_109_end_mask_0 = const()[name = string("v_cache_109_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_109_squeeze_mask_0 = const()[name = string("v_cache_109_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_109_cast_fp16 = slice_by_index(begin = v_cache_109_begin_0, end = v_cache_109_end_0, end_mask = v_cache_109_end_mask_0, squeeze_mask = v_cache_109_squeeze_mask_0, x = coreml_update_state_117)[name = string("v_cache_109_cast_fp16")];
+            tensor<int32, [4]> k_cache_111_begin_0 = const()[name = string("k_cache_111_begin_0"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_111_end_0 = const()[name = string("k_cache_111_end_0"), val = tensor<int32, [4]>([28, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_111_end_mask_0 = const()[name = string("k_cache_111_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_111_squeeze_mask_0 = const()[name = string("k_cache_111_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_111_cast_fp16 = slice_by_index(begin = k_cache_111_begin_0, end = k_cache_111_end_0, end_mask = k_cache_111_end_mask_0, squeeze_mask = k_cache_111_squeeze_mask_0, x = read_state_2)[name = string("k_cache_111_cast_fp16")];
+            tensor<int32, [4]> v_cache_111_begin_0 = const()[name = string("v_cache_111_begin_0"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_111_end_0 = const()[name = string("v_cache_111_end_0"), val = tensor<int32, [4]>([28, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_111_end_mask_0 = const()[name = string("v_cache_111_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_111_squeeze_mask_0 = const()[name = string("v_cache_111_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_111_cast_fp16 = slice_by_index(begin = v_cache_111_begin_0, end = v_cache_111_end_0, end_mask = v_cache_111_end_mask_0, squeeze_mask = v_cache_111_squeeze_mask_0, x = read_state_3)[name = string("v_cache_111_cast_fp16")];
+            int32 var_5877 = const()[name = string("op_5877"), val = int32(-1)];
+            tensor<int32, [1]> var_5895_axes_0 = const()[name = string("op_5895_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_27_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377938240)))];
+            tensor<fp16, [1280]> blocks_27_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377940864)))];
+            fp16 var_5883_to_fp16 = const()[name = string("op_5883_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_5895_cast_fp16 = layer_norm(axes = var_5895_axes_0, beta = blocks_27_attn_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_attn_ln_weight_to_fp16, x = x_489_cast_fp16)[name = string("op_5895_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5906_to_fp16 = const()[name = string("op_5906_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377943488)))];
+            tensor<fp16, [1280]> var_5907_to_fp16 = const()[name = string("op_5907_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1381220352)))];
+            tensor<fp16, [1, ?, 1280]> linear_216_cast_fp16 = linear(bias = var_5907_to_fp16, weight = var_5906_to_fp16, x = var_5895_cast_fp16)[name = string("linear_216_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5910_to_fp16 = const()[name = string("op_5910_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1381222976)))];
+            tensor<fp16, [1, ?, 1280]> linear_217_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5910_to_fp16, x = var_5895_cast_fp16)[name = string("linear_217_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5914_to_fp16 = const()[name = string("op_5914_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1384499840)))];
+            tensor<fp16, [1280]> var_5915_to_fp16 = const()[name = string("op_5915_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1387776704)))];
+            tensor<fp16, [1, ?, 1280]> linear_218_cast_fp16 = linear(bias = var_5915_to_fp16, weight = var_5914_to_fp16, x = var_5895_cast_fp16)[name = string("linear_218_cast_fp16")];
+            tensor<int32, [3]> var_5917_shape_cast_fp16 = shape(x = linear_216_cast_fp16)[name = string("op_5917_shape_cast_fp16")];
+            int32 gather_326_axis_0 = const()[name = string("gather_326_axis_0"), val = int32(0)];
+            int32 gather_326_batch_dims_0 = const()[name = string("gather_326_batch_dims_0"), val = int32(0)];
+            bool gather_326_validate_indices_0 = const()[name = string("gather_326_validate_indices_0"), val = bool(false)];
+            string var_5917_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5917_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_326_to_uint16 = const()[name = string("select_326_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5917_shape_cast_fp16_to_uint16 = cast(dtype = var_5917_shape_cast_fp16_to_uint16_dtype_0, x = var_5917_shape_cast_fp16)[name = string("cast_336")];
+            uint16 gather_326_cast_uint16 = gather(axis = gather_326_axis_0, batch_dims = gather_326_batch_dims_0, indices = select_326_to_uint16, validate_indices = gather_326_validate_indices_0, x = var_5917_shape_cast_fp16_to_uint16)[name = string("gather_326_cast_uint16")];
+            string gather_326_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_326_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_326_cast_uint16_to_int32 = cast(dtype = gather_326_cast_uint16_to_int32_dtype_0, x = gather_326_cast_uint16)[name = string("cast_335")];
+            int32 end_step_57 = add(x = offset, y = gather_326_cast_uint16_to_int32)[name = string("end_step_57")];
+            tensor<int32, [1]> expand_dims_432 = const()[name = string("expand_dims_432"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_434 = const()[name = string("expand_dims_434"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_435_axes_0 = const()[name = string("expand_dims_435_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_435 = expand_dims(axes = expand_dims_435_axes_0, x = end_step_57)[name = string("expand_dims_435")];
+            tensor<int32, [1]> concat_598_values0_0 = const()[name = string("concat_598_values0_0"), val = tensor<int32, [1]>([27])];
+            int32 concat_598_axis_0 = const()[name = string("concat_598_axis_0"), val = int32(0)];
+            bool concat_598_interleave_0 = const()[name = string("concat_598_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_598 = concat(axis = concat_598_axis_0, interleave = concat_598_interleave_0, values = (concat_598_values0_0, expand_dims_432, expand_dims_1, expand_dims_434))[name = string("concat_598")];
+            tensor<int32, [1]> concat_599_values0_0 = const()[name = string("concat_599_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_599_values1_0 = const()[name = string("concat_599_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_599_values3_0 = const()[name = string("concat_599_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_599_axis_0 = const()[name = string("concat_599_axis_0"), val = int32(0)];
+            bool concat_599_interleave_0 = const()[name = string("concat_599_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_599 = concat(axis = concat_599_axis_0, interleave = concat_599_interleave_0, values = (concat_599_values0_0, concat_599_values1_0, expand_dims_435, concat_599_values3_0))[name = string("concat_599")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_28_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_28_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_28_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_28_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_598, begin_mask = k_cache1_internal_tensor_assign_28_begin_mask_0, end = concat_599, end_mask = k_cache1_internal_tensor_assign_28_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_28_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_28_stride_0, update = linear_217_cast_fp16, x = coreml_update_state_116)[name = string("k_cache1_internal_tensor_assign_28_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_28_cast_fp16, input = k_cache1)[name = string("coreml_update_state_118_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_118 = read_state(input = k_cache1)[name = string("coreml_update_state_118")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_28_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_28_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_28_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_28_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_598, begin_mask = v_cache1_internal_tensor_assign_28_begin_mask_0, end = concat_599, end_mask = v_cache1_internal_tensor_assign_28_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_28_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_28_stride_0, update = linear_218_cast_fp16, x = coreml_update_state_117)[name = string("v_cache1_internal_tensor_assign_28_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_28_cast_fp16, input = v_cache1)[name = string("coreml_update_state_119_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_119 = read_state(input = v_cache1)[name = string("coreml_update_state_119")];
+            int32 concat_604_values0_0 = const()[name = string("concat_604_values0_0"), val = int32(1)];
+            int32 concat_604_values2_0 = const()[name = string("concat_604_values2_0"), val = int32(1280)];
+            int32 concat_604_axis_0 = const()[name = string("concat_604_axis_0"), val = int32(0)];
+            bool concat_604_interleave_0 = const()[name = string("concat_604_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_604 = concat(axis = concat_604_axis_0, interleave = concat_604_interleave_0, values = (concat_604_values0_0, end_step_57, concat_604_values2_0))[name = string("concat_604")];
+            tensor<int32, [3]> var_5933_begin_0 = const()[name = string("op_5933_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5933_end_mask_0 = const()[name = string("op_5933_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5933_cast_fp16 = slice_by_index(begin = var_5933_begin_0, end = concat_604, end_mask = var_5933_end_mask_0, x = k_cache_109_cast_fp16)[name = string("op_5933_cast_fp16")];
+            tensor<int32, [3]> var_5936_begin_0 = const()[name = string("op_5936_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5936_end_mask_0 = const()[name = string("op_5936_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5936_cast_fp16 = slice_by_index(begin = var_5936_begin_0, end = concat_604, end_mask = var_5936_end_mask_0, x = v_cache_109_cast_fp16)[name = string("op_5936_cast_fp16")];
+            tensor<int32, [4]> concat_606x = const()[name = string("concat_606x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5946_cast_fp16 = reshape(shape = concat_606x, x = linear_216_cast_fp16)[name = string("op_5946_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_268_to_fp16 = const()[name = string("const_268_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_219_cast_fp16 = mul(x = var_5946_cast_fp16, y = const_268_to_fp16)[name = string("q_219_cast_fp16")];
+            tensor<int32, [4]> concat_607x = const()[name = string("concat_607x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5953_cast_fp16 = reshape(shape = concat_607x, x = var_5933_cast_fp16)[name = string("op_5953_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_275_cast_fp16 = mul(x = var_5953_cast_fp16, y = const_269_to_fp16)[name = string("k_275_cast_fp16")];
+            tensor<int32, [4]> concat_608x = const()[name = string("concat_608x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5960_cast_fp16 = reshape(shape = concat_608x, x = var_5936_cast_fp16)[name = string("op_5960_cast_fp16")];
+            tensor<int32, [4]> var_5961 = const()[name = string("op_5961"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_163_transpose_x_0 = const()[name = string("qk_163_transpose_x_0"), val = bool(false)];
+            bool qk_163_transpose_y_0 = const()[name = string("qk_163_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_365_perm_0 = const()[name = string("transpose_365_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_366_perm_0 = const()[name = string("transpose_366_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_366 = transpose(perm = transpose_366_perm_0, x = k_275_cast_fp16)[name = string("transpose_422")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_365 = transpose(perm = transpose_365_perm_0, x = q_219_cast_fp16)[name = string("transpose_423")];
+            tensor<fp16, [1, 20, ?, ?]> qk_163_cast_fp16 = matmul(transpose_x = qk_163_transpose_x_0, transpose_y = qk_163_transpose_y_0, x = transpose_365, y = transpose_366)[name = string("qk_163_cast_fp16")];
+            int32 concat_609_values1_0 = const()[name = string("concat_609_values1_0"), val = int32(448)];
+            int32 concat_609_axis_0 = const()[name = string("concat_609_axis_0"), val = int32(0)];
+            bool concat_609_interleave_0 = const()[name = string("concat_609_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_609 = concat(axis = concat_609_axis_0, interleave = concat_609_interleave_0, values = (gather_326_cast_uint16_to_int32, concat_609_values1_0))[name = string("concat_609")];
+            tensor<int32, [2]> var_5964_begin_0 = const()[name = string("op_5964_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5964_end_mask_0 = const()[name = string("op_5964_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5964_cast_fp16 = slice_by_index(begin = var_5964_begin_0, end = concat_609, end_mask = var_5964_end_mask_0, x = mask_to_fp16)[name = string("op_5964_cast_fp16")];
+            int32 concat_610_values0_0 = const()[name = string("concat_610_values0_0"), val = int32(0)];
+            int32 concat_610_axis_0 = const()[name = string("concat_610_axis_0"), val = int32(0)];
+            bool concat_610_interleave_0 = const()[name = string("concat_610_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_610 = concat(axis = concat_610_axis_0, interleave = concat_610_interleave_0, values = (concat_610_values0_0, gather_326_cast_uint16_to_int32))[name = string("concat_610")];
+            tensor<int32, [2]> var_5965_begin_0 = const()[name = string("op_5965_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5965_end_mask_0 = const()[name = string("op_5965_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5965_cast_fp16 = slice_by_index(begin = var_5965_begin_0, end = concat_610, end_mask = var_5965_end_mask_0, x = var_5964_cast_fp16)[name = string("op_5965_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_165_cast_fp16 = add(x = qk_163_cast_fp16, y = var_5965_cast_fp16)[name = string("qk_165_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_5968_cast_fp16 = softmax(axis = var_5877, x = qk_165_cast_fp16)[name = string("op_5968_cast_fp16")];
+            bool var_5970_transpose_x_0 = const()[name = string("op_5970_transpose_x_0"), val = bool(false)];
+            bool var_5970_transpose_y_0 = const()[name = string("op_5970_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_275_cast_fp16 = transpose(perm = var_5961, x = var_5960_cast_fp16)[name = string("transpose_424")];
+            tensor<fp16, [1, 20, ?, 64]> var_5970_cast_fp16 = matmul(transpose_x = var_5970_transpose_x_0, transpose_y = var_5970_transpose_y_0, x = var_5968_cast_fp16, y = v_275_cast_fp16)[name = string("op_5970_cast_fp16")];
+            tensor<int32, [4]> var_5971 = const()[name = string("op_5971"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_611x = const()[name = string("concat_611x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5972_cast_fp16 = transpose(perm = var_5971, x = var_5970_cast_fp16)[name = string("transpose_421")];
+            tensor<fp16, [1, ?, 1280]> x_493_cast_fp16 = reshape(shape = concat_611x, x = var_5972_cast_fp16)[name = string("x_493_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5976_to_fp16 = const()[name = string("op_5976_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1387779328)))];
+            tensor<fp16, [1280]> var_5977_to_fp16 = const()[name = string("op_5977_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391056192)))];
+            tensor<fp16, [1, ?, 1280]> linear_219_cast_fp16 = linear(bias = var_5977_to_fp16, weight = var_5976_to_fp16, x = x_493_cast_fp16)[name = string("linear_219_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_495_cast_fp16 = add(x = x_489_cast_fp16, y = linear_219_cast_fp16)[name = string("x_495_cast_fp16")];
+            tensor<int32, [1]> var_5984_axes_0 = const()[name = string("op_5984_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_27_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391058816)))];
+            tensor<fp16, [1280]> blocks_27_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391061440)))];
+            tensor<fp16, [1, ?, 1280]> var_5984_cast_fp16 = layer_norm(axes = var_5984_axes_0, beta = blocks_27_cross_attn_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_cross_attn_ln_weight_to_fp16, x = x_495_cast_fp16)[name = string("op_5984_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5993_to_fp16 = const()[name = string("op_5993_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391064064)))];
+            tensor<fp16, [1280]> var_5994_to_fp16 = const()[name = string("op_5994_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1394340928)))];
+            tensor<fp16, [1, ?, 1280]> linear_220_cast_fp16 = linear(bias = var_5994_to_fp16, weight = var_5993_to_fp16, x = var_5984_cast_fp16)[name = string("linear_220_cast_fp16")];
+            tensor<int32, [3]> concat_612 = const()[name = string("concat_612"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_613 = const()[name = string("concat_613"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_277_internal_tensor_assign_1_stride_0 = const()[name = string("k_277_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_277_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_277_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_277_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_277_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_612, begin_mask = k_277_internal_tensor_assign_1_begin_mask_0, end = concat_613, end_mask = k_277_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_277_internal_tensor_assign_1_squeeze_mask_0, stride = k_277_internal_tensor_assign_1_stride_0, update = k_cache_111_cast_fp16, x = k_7_to_fp16)[name = string("k_277_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_614 = const()[name = string("concat_614"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_615 = const()[name = string("concat_615"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_277_internal_tensor_assign_1_stride_0 = const()[name = string("v_277_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_277_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_277_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_277_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_277_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_614, begin_mask = v_277_internal_tensor_assign_1_begin_mask_0, end = concat_615, end_mask = v_277_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_277_internal_tensor_assign_1_squeeze_mask_0, stride = v_277_internal_tensor_assign_1_stride_0, update = v_cache_111_cast_fp16, x = k_7_to_fp16)[name = string("v_277_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_616x = const()[name = string("concat_616x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6014_cast_fp16 = reshape(shape = concat_616x, x = linear_220_cast_fp16)[name = string("op_6014_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_223_cast_fp16 = mul(x = var_6014_cast_fp16, y = const_270_to_fp16)[name = string("q_223_cast_fp16")];
+            tensor<int32, [4]> var_6020 = const()[name = string("op_6020"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6021_cast_fp16 = reshape(shape = var_6020, x = k_277_internal_tensor_assign_1_cast_fp16)[name = string("op_6021_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_279_cast_fp16 = mul(x = var_6021_cast_fp16, y = const_271_to_fp16)[name = string("k_279_cast_fp16")];
+            tensor<int32, [4]> var_6027 = const()[name = string("op_6027"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6028_cast_fp16 = reshape(shape = var_6027, x = v_277_internal_tensor_assign_1_cast_fp16)[name = string("op_6028_cast_fp16")];
+            tensor<int32, [4]> var_6029 = const()[name = string("op_6029"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_167_transpose_x_0 = const()[name = string("qk_167_transpose_x_0"), val = bool(false)];
+            bool qk_167_transpose_y_0 = const()[name = string("qk_167_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_367_perm_0 = const()[name = string("transpose_367_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_368_perm_0 = const()[name = string("transpose_368_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_368 = transpose(perm = transpose_368_perm_0, x = k_279_cast_fp16)[name = string("transpose_418")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_367 = transpose(perm = transpose_367_perm_0, x = q_223_cast_fp16)[name = string("transpose_419")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_167_cast_fp16 = matmul(transpose_x = qk_167_transpose_x_0, transpose_y = qk_167_transpose_y_0, x = transpose_367, y = transpose_368)[name = string("qk_167_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_6033_cast_fp16 = softmax(axis = var_5877, x = qk_167_cast_fp16)[name = string("op_6033_cast_fp16")];
+            bool var_6035_transpose_x_0 = const()[name = string("op_6035_transpose_x_0"), val = bool(false)];
+            bool var_6035_transpose_y_0 = const()[name = string("op_6035_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_279_cast_fp16 = transpose(perm = var_6029, x = var_6028_cast_fp16)[name = string("transpose_420")];
+            tensor<fp16, [1, 20, ?, 64]> var_6035_cast_fp16 = matmul(transpose_x = var_6035_transpose_x_0, transpose_y = var_6035_transpose_y_0, x = var_6033_cast_fp16, y = v_279_cast_fp16)[name = string("op_6035_cast_fp16")];
+            tensor<int32, [4]> var_6036 = const()[name = string("op_6036"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_617x = const()[name = string("concat_617x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6037_cast_fp16 = transpose(perm = var_6036, x = var_6035_cast_fp16)[name = string("transpose_417")];
+            tensor<fp16, [1, ?, 1280]> x_499_cast_fp16 = reshape(shape = concat_617x, x = var_6037_cast_fp16)[name = string("x_499_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6041_to_fp16 = const()[name = string("op_6041_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1394343552)))];
+            tensor<fp16, [1280]> var_6042_to_fp16 = const()[name = string("op_6042_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397620416)))];
+            tensor<fp16, [1, ?, 1280]> linear_221_cast_fp16 = linear(bias = var_6042_to_fp16, weight = var_6041_to_fp16, x = x_499_cast_fp16)[name = string("linear_221_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_501_cast_fp16 = add(x = x_495_cast_fp16, y = linear_221_cast_fp16)[name = string("x_501_cast_fp16")];
+            tensor<int32, [1]> var_6049_axes_0 = const()[name = string("op_6049_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_27_mlp_ln_weight_to_fp16 = const()[name = string("blocks_27_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397623040)))];
+            tensor<fp16, [1280]> blocks_27_mlp_ln_bias_to_fp16 = const()[name = string("blocks_27_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397625664)))];
+            tensor<fp16, [1, ?, 1280]> var_6049_cast_fp16 = layer_norm(axes = var_6049_axes_0, beta = blocks_27_mlp_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_mlp_ln_weight_to_fp16, x = x_501_cast_fp16)[name = string("op_6049_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_6058_to_fp16 = const()[name = string("op_6058_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397628288)))];
+            tensor<fp16, [5120]> var_6059_to_fp16 = const()[name = string("op_6059_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410735552)))];
+            tensor<fp16, [1, ?, 5120]> linear_222_cast_fp16 = linear(bias = var_6059_to_fp16, weight = var_6058_to_fp16, x = var_6049_cast_fp16)[name = string("linear_222_cast_fp16")];
+            string x_505_mode_0 = const()[name = string("x_505_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_505_cast_fp16 = gelu(mode = x_505_mode_0, x = linear_222_cast_fp16)[name = string("x_505_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_6064_to_fp16 = const()[name = string("op_6064_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410745856)))];
+            tensor<fp16, [1280]> var_6065_to_fp16 = const()[name = string("op_6065_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423853120)))];
+            tensor<fp16, [1, ?, 1280]> linear_223_cast_fp16 = linear(bias = var_6065_to_fp16, weight = var_6064_to_fp16, x = x_505_cast_fp16)[name = string("linear_223_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_507_cast_fp16 = add(x = x_501_cast_fp16, y = linear_223_cast_fp16)[name = string("x_507_cast_fp16")];
+            tensor<int32, [4]> k_cache_113_begin_0 = const()[name = string("k_cache_113_begin_0"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_113_end_0 = const()[name = string("k_cache_113_end_0"), val = tensor<int32, [4]>([29, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_113_end_mask_0 = const()[name = string("k_cache_113_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_113_squeeze_mask_0 = const()[name = string("k_cache_113_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_113_cast_fp16 = slice_by_index(begin = k_cache_113_begin_0, end = k_cache_113_end_0, end_mask = k_cache_113_end_mask_0, squeeze_mask = k_cache_113_squeeze_mask_0, x = coreml_update_state_118)[name = string("k_cache_113_cast_fp16")];
+            tensor<int32, [4]> v_cache_113_begin_0 = const()[name = string("v_cache_113_begin_0"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_113_end_0 = const()[name = string("v_cache_113_end_0"), val = tensor<int32, [4]>([29, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_113_end_mask_0 = const()[name = string("v_cache_113_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_113_squeeze_mask_0 = const()[name = string("v_cache_113_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_113_cast_fp16 = slice_by_index(begin = v_cache_113_begin_0, end = v_cache_113_end_0, end_mask = v_cache_113_end_mask_0, squeeze_mask = v_cache_113_squeeze_mask_0, x = coreml_update_state_119)[name = string("v_cache_113_cast_fp16")];
+            tensor<int32, [4]> k_cache_115_begin_0 = const()[name = string("k_cache_115_begin_0"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_115_end_0 = const()[name = string("k_cache_115_end_0"), val = tensor<int32, [4]>([29, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_115_end_mask_0 = const()[name = string("k_cache_115_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_115_squeeze_mask_0 = const()[name = string("k_cache_115_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_115_cast_fp16 = slice_by_index(begin = k_cache_115_begin_0, end = k_cache_115_end_0, end_mask = k_cache_115_end_mask_0, squeeze_mask = k_cache_115_squeeze_mask_0, x = read_state_2)[name = string("k_cache_115_cast_fp16")];
+            tensor<int32, [4]> v_cache_115_begin_0 = const()[name = string("v_cache_115_begin_0"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_115_end_0 = const()[name = string("v_cache_115_end_0"), val = tensor<int32, [4]>([29, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_115_end_mask_0 = const()[name = string("v_cache_115_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_115_squeeze_mask_0 = const()[name = string("v_cache_115_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_115_cast_fp16 = slice_by_index(begin = v_cache_115_begin_0, end = v_cache_115_end_0, end_mask = v_cache_115_end_mask_0, squeeze_mask = v_cache_115_squeeze_mask_0, x = read_state_3)[name = string("v_cache_115_cast_fp16")];
+            int32 var_6088 = const()[name = string("op_6088"), val = int32(-1)];
+            tensor<int32, [1]> var_6106_axes_0 = const()[name = string("op_6106_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_28_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423855744)))];
+            tensor<fp16, [1280]> blocks_28_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423858368)))];
+            fp16 var_6094_to_fp16 = const()[name = string("op_6094_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_6106_cast_fp16 = layer_norm(axes = var_6106_axes_0, beta = blocks_28_attn_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_attn_ln_weight_to_fp16, x = x_507_cast_fp16)[name = string("op_6106_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6117_to_fp16 = const()[name = string("op_6117_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423860992)))];
+            tensor<fp16, [1280]> var_6118_to_fp16 = const()[name = string("op_6118_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1427137856)))];
+            tensor<fp16, [1, ?, 1280]> linear_224_cast_fp16 = linear(bias = var_6118_to_fp16, weight = var_6117_to_fp16, x = var_6106_cast_fp16)[name = string("linear_224_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6121_to_fp16 = const()[name = string("op_6121_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1427140480)))];
+            tensor<fp16, [1, ?, 1280]> linear_225_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6121_to_fp16, x = var_6106_cast_fp16)[name = string("linear_225_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6125_to_fp16 = const()[name = string("op_6125_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1430417344)))];
+            tensor<fp16, [1280]> var_6126_to_fp16 = const()[name = string("op_6126_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433694208)))];
+            tensor<fp16, [1, ?, 1280]> linear_226_cast_fp16 = linear(bias = var_6126_to_fp16, weight = var_6125_to_fp16, x = var_6106_cast_fp16)[name = string("linear_226_cast_fp16")];
+            tensor<int32, [3]> var_6128_shape_cast_fp16 = shape(x = linear_224_cast_fp16)[name = string("op_6128_shape_cast_fp16")];
+            int32 gather_338_axis_0 = const()[name = string("gather_338_axis_0"), val = int32(0)];
+            int32 gather_338_batch_dims_0 = const()[name = string("gather_338_batch_dims_0"), val = int32(0)];
+            bool gather_338_validate_indices_0 = const()[name = string("gather_338_validate_indices_0"), val = bool(false)];
+            string var_6128_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6128_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_338_to_uint16 = const()[name = string("select_338_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_6128_shape_cast_fp16_to_uint16 = cast(dtype = var_6128_shape_cast_fp16_to_uint16_dtype_0, x = var_6128_shape_cast_fp16)[name = string("cast_334")];
+            uint16 gather_338_cast_uint16 = gather(axis = gather_338_axis_0, batch_dims = gather_338_batch_dims_0, indices = select_338_to_uint16, validate_indices = gather_338_validate_indices_0, x = var_6128_shape_cast_fp16_to_uint16)[name = string("gather_338_cast_uint16")];
+            string gather_338_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_338_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_338_cast_uint16_to_int32 = cast(dtype = gather_338_cast_uint16_to_int32_dtype_0, x = gather_338_cast_uint16)[name = string("cast_333")];
+            int32 end_step_59 = add(x = offset, y = gather_338_cast_uint16_to_int32)[name = string("end_step_59")];
+            tensor<int32, [1]> expand_dims_448 = const()[name = string("expand_dims_448"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_450 = const()[name = string("expand_dims_450"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_451_axes_0 = const()[name = string("expand_dims_451_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_451 = expand_dims(axes = expand_dims_451_axes_0, x = end_step_59)[name = string("expand_dims_451")];
+            tensor<int32, [1]> concat_620_values0_0 = const()[name = string("concat_620_values0_0"), val = tensor<int32, [1]>([28])];
+            int32 concat_620_axis_0 = const()[name = string("concat_620_axis_0"), val = int32(0)];
+            bool concat_620_interleave_0 = const()[name = string("concat_620_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_620 = concat(axis = concat_620_axis_0, interleave = concat_620_interleave_0, values = (concat_620_values0_0, expand_dims_448, expand_dims_1, expand_dims_450))[name = string("concat_620")];
+            tensor<int32, [1]> concat_621_values0_0 = const()[name = string("concat_621_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_621_values1_0 = const()[name = string("concat_621_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_621_values3_0 = const()[name = string("concat_621_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_621_axis_0 = const()[name = string("concat_621_axis_0"), val = int32(0)];
+            bool concat_621_interleave_0 = const()[name = string("concat_621_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_621 = concat(axis = concat_621_axis_0, interleave = concat_621_interleave_0, values = (concat_621_values0_0, concat_621_values1_0, expand_dims_451, concat_621_values3_0))[name = string("concat_621")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_29_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_29_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_29_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_29_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_620, begin_mask = k_cache1_internal_tensor_assign_29_begin_mask_0, end = concat_621, end_mask = k_cache1_internal_tensor_assign_29_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_29_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_29_stride_0, update = linear_225_cast_fp16, x = coreml_update_state_118)[name = string("k_cache1_internal_tensor_assign_29_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_29_cast_fp16, input = k_cache1)[name = string("coreml_update_state_120_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_120 = read_state(input = k_cache1)[name = string("coreml_update_state_120")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_29_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_29_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_29_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_29_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_620, begin_mask = v_cache1_internal_tensor_assign_29_begin_mask_0, end = concat_621, end_mask = v_cache1_internal_tensor_assign_29_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_29_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_29_stride_0, update = linear_226_cast_fp16, x = coreml_update_state_119)[name = string("v_cache1_internal_tensor_assign_29_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_29_cast_fp16, input = v_cache1)[name = string("coreml_update_state_121_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_121 = read_state(input = v_cache1)[name = string("coreml_update_state_121")];
+            int32 concat_626_values0_0 = const()[name = string("concat_626_values0_0"), val = int32(1)];
+            int32 concat_626_values2_0 = const()[name = string("concat_626_values2_0"), val = int32(1280)];
+            int32 concat_626_axis_0 = const()[name = string("concat_626_axis_0"), val = int32(0)];
+            bool concat_626_interleave_0 = const()[name = string("concat_626_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_626 = concat(axis = concat_626_axis_0, interleave = concat_626_interleave_0, values = (concat_626_values0_0, end_step_59, concat_626_values2_0))[name = string("concat_626")];
+            tensor<int32, [3]> var_6144_begin_0 = const()[name = string("op_6144_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6144_end_mask_0 = const()[name = string("op_6144_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6144_cast_fp16 = slice_by_index(begin = var_6144_begin_0, end = concat_626, end_mask = var_6144_end_mask_0, x = k_cache_113_cast_fp16)[name = string("op_6144_cast_fp16")];
+            tensor<int32, [3]> var_6147_begin_0 = const()[name = string("op_6147_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6147_end_mask_0 = const()[name = string("op_6147_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6147_cast_fp16 = slice_by_index(begin = var_6147_begin_0, end = concat_626, end_mask = var_6147_end_mask_0, x = v_cache_113_cast_fp16)[name = string("op_6147_cast_fp16")];
+            tensor<int32, [4]> concat_628x = const()[name = string("concat_628x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6157_cast_fp16 = reshape(shape = concat_628x, x = linear_224_cast_fp16)[name = string("op_6157_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_272_to_fp16 = const()[name = string("const_272_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_227_cast_fp16 = mul(x = var_6157_cast_fp16, y = const_272_to_fp16)[name = string("q_227_cast_fp16")];
+            tensor<int32, [4]> concat_629x = const()[name = string("concat_629x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6164_cast_fp16 = reshape(shape = concat_629x, x = var_6144_cast_fp16)[name = string("op_6164_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_285_cast_fp16 = mul(x = var_6164_cast_fp16, y = const_273_to_fp16)[name = string("k_285_cast_fp16")];
+            tensor<int32, [4]> concat_630x = const()[name = string("concat_630x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6171_cast_fp16 = reshape(shape = concat_630x, x = var_6147_cast_fp16)[name = string("op_6171_cast_fp16")];
+            tensor<int32, [4]> var_6172 = const()[name = string("op_6172"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_169_transpose_x_0 = const()[name = string("qk_169_transpose_x_0"), val = bool(false)];
+            bool qk_169_transpose_y_0 = const()[name = string("qk_169_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_369_perm_0 = const()[name = string("transpose_369_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_370_perm_0 = const()[name = string("transpose_370_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_370 = transpose(perm = transpose_370_perm_0, x = k_285_cast_fp16)[name = string("transpose_414")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_369 = transpose(perm = transpose_369_perm_0, x = q_227_cast_fp16)[name = string("transpose_415")];
+            tensor<fp16, [1, 20, ?, ?]> qk_169_cast_fp16 = matmul(transpose_x = qk_169_transpose_x_0, transpose_y = qk_169_transpose_y_0, x = transpose_369, y = transpose_370)[name = string("qk_169_cast_fp16")];
+            int32 concat_631_values1_0 = const()[name = string("concat_631_values1_0"), val = int32(448)];
+            int32 concat_631_axis_0 = const()[name = string("concat_631_axis_0"), val = int32(0)];
+            bool concat_631_interleave_0 = const()[name = string("concat_631_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_631 = concat(axis = concat_631_axis_0, interleave = concat_631_interleave_0, values = (gather_338_cast_uint16_to_int32, concat_631_values1_0))[name = string("concat_631")];
+            tensor<int32, [2]> var_6175_begin_0 = const()[name = string("op_6175_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6175_end_mask_0 = const()[name = string("op_6175_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_6175_cast_fp16 = slice_by_index(begin = var_6175_begin_0, end = concat_631, end_mask = var_6175_end_mask_0, x = mask_to_fp16)[name = string("op_6175_cast_fp16")];
+            int32 concat_632_values0_0 = const()[name = string("concat_632_values0_0"), val = int32(0)];
+            int32 concat_632_axis_0 = const()[name = string("concat_632_axis_0"), val = int32(0)];
+            bool concat_632_interleave_0 = const()[name = string("concat_632_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_632 = concat(axis = concat_632_axis_0, interleave = concat_632_interleave_0, values = (concat_632_values0_0, gather_338_cast_uint16_to_int32))[name = string("concat_632")];
+            tensor<int32, [2]> var_6176_begin_0 = const()[name = string("op_6176_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6176_end_mask_0 = const()[name = string("op_6176_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_6176_cast_fp16 = slice_by_index(begin = var_6176_begin_0, end = concat_632, end_mask = var_6176_end_mask_0, x = var_6175_cast_fp16)[name = string("op_6176_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_171_cast_fp16 = add(x = qk_169_cast_fp16, y = var_6176_cast_fp16)[name = string("qk_171_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_6179_cast_fp16 = softmax(axis = var_6088, x = qk_171_cast_fp16)[name = string("op_6179_cast_fp16")];
+            bool var_6181_transpose_x_0 = const()[name = string("op_6181_transpose_x_0"), val = bool(false)];
+            bool var_6181_transpose_y_0 = const()[name = string("op_6181_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_285_cast_fp16 = transpose(perm = var_6172, x = var_6171_cast_fp16)[name = string("transpose_416")];
+            tensor<fp16, [1, 20, ?, 64]> var_6181_cast_fp16 = matmul(transpose_x = var_6181_transpose_x_0, transpose_y = var_6181_transpose_y_0, x = var_6179_cast_fp16, y = v_285_cast_fp16)[name = string("op_6181_cast_fp16")];
+            tensor<int32, [4]> var_6182 = const()[name = string("op_6182"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_633x = const()[name = string("concat_633x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6183_cast_fp16 = transpose(perm = var_6182, x = var_6181_cast_fp16)[name = string("transpose_413")];
+            tensor<fp16, [1, ?, 1280]> x_511_cast_fp16 = reshape(shape = concat_633x, x = var_6183_cast_fp16)[name = string("x_511_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6187_to_fp16 = const()[name = string("op_6187_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433696832)))];
+            tensor<fp16, [1280]> var_6188_to_fp16 = const()[name = string("op_6188_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436973696)))];
+            tensor<fp16, [1, ?, 1280]> linear_227_cast_fp16 = linear(bias = var_6188_to_fp16, weight = var_6187_to_fp16, x = x_511_cast_fp16)[name = string("linear_227_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_513_cast_fp16 = add(x = x_507_cast_fp16, y = linear_227_cast_fp16)[name = string("x_513_cast_fp16")];
+            tensor<int32, [1]> var_6195_axes_0 = const()[name = string("op_6195_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_28_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436976320)))];
+            tensor<fp16, [1280]> blocks_28_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436978944)))];
+            tensor<fp16, [1, ?, 1280]> var_6195_cast_fp16 = layer_norm(axes = var_6195_axes_0, beta = blocks_28_cross_attn_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_cross_attn_ln_weight_to_fp16, x = x_513_cast_fp16)[name = string("op_6195_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6204_to_fp16 = const()[name = string("op_6204_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436981568)))];
+            tensor<fp16, [1280]> var_6205_to_fp16 = const()[name = string("op_6205_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440258432)))];
+            tensor<fp16, [1, ?, 1280]> linear_228_cast_fp16 = linear(bias = var_6205_to_fp16, weight = var_6204_to_fp16, x = var_6195_cast_fp16)[name = string("linear_228_cast_fp16")];
+            tensor<int32, [3]> concat_634 = const()[name = string("concat_634"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_635 = const()[name = string("concat_635"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_287_internal_tensor_assign_1_stride_0 = const()[name = string("k_287_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_287_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_287_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_287_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_287_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_634, begin_mask = k_287_internal_tensor_assign_1_begin_mask_0, end = concat_635, end_mask = k_287_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_287_internal_tensor_assign_1_squeeze_mask_0, stride = k_287_internal_tensor_assign_1_stride_0, update = k_cache_115_cast_fp16, x = k_7_to_fp16)[name = string("k_287_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_636 = const()[name = string("concat_636"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_637 = const()[name = string("concat_637"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_287_internal_tensor_assign_1_stride_0 = const()[name = string("v_287_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_287_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_287_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_287_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_287_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_636, begin_mask = v_287_internal_tensor_assign_1_begin_mask_0, end = concat_637, end_mask = v_287_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_287_internal_tensor_assign_1_squeeze_mask_0, stride = v_287_internal_tensor_assign_1_stride_0, update = v_cache_115_cast_fp16, x = k_7_to_fp16)[name = string("v_287_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_638x = const()[name = string("concat_638x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6225_cast_fp16 = reshape(shape = concat_638x, x = linear_228_cast_fp16)[name = string("op_6225_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_274_to_fp16 = const()[name = string("const_274_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_231_cast_fp16 = mul(x = var_6225_cast_fp16, y = const_274_to_fp16)[name = string("q_231_cast_fp16")];
+            tensor<int32, [4]> var_6231 = const()[name = string("op_6231"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6232_cast_fp16 = reshape(shape = var_6231, x = k_287_internal_tensor_assign_1_cast_fp16)[name = string("op_6232_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_275_to_fp16 = const()[name = string("const_275_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_289_cast_fp16 = mul(x = var_6232_cast_fp16, y = const_275_to_fp16)[name = string("k_289_cast_fp16")];
+            tensor<int32, [4]> var_6238 = const()[name = string("op_6238"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6239_cast_fp16 = reshape(shape = var_6238, x = v_287_internal_tensor_assign_1_cast_fp16)[name = string("op_6239_cast_fp16")];
+            tensor<int32, [4]> var_6240 = const()[name = string("op_6240"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_173_transpose_x_0 = const()[name = string("qk_173_transpose_x_0"), val = bool(false)];
+            bool qk_173_transpose_y_0 = const()[name = string("qk_173_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_371_perm_0 = const()[name = string("transpose_371_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_372_perm_0 = const()[name = string("transpose_372_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_372 = transpose(perm = transpose_372_perm_0, x = k_289_cast_fp16)[name = string("transpose_410")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_371 = transpose(perm = transpose_371_perm_0, x = q_231_cast_fp16)[name = string("transpose_411")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_173_cast_fp16 = matmul(transpose_x = qk_173_transpose_x_0, transpose_y = qk_173_transpose_y_0, x = transpose_371, y = transpose_372)[name = string("qk_173_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_6244_cast_fp16 = softmax(axis = var_6088, x = qk_173_cast_fp16)[name = string("op_6244_cast_fp16")];
+            bool var_6246_transpose_x_0 = const()[name = string("op_6246_transpose_x_0"), val = bool(false)];
+            bool var_6246_transpose_y_0 = const()[name = string("op_6246_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_289_cast_fp16 = transpose(perm = var_6240, x = var_6239_cast_fp16)[name = string("transpose_412")];
+            tensor<fp16, [1, 20, ?, 64]> var_6246_cast_fp16 = matmul(transpose_x = var_6246_transpose_x_0, transpose_y = var_6246_transpose_y_0, x = var_6244_cast_fp16, y = v_289_cast_fp16)[name = string("op_6246_cast_fp16")];
+            tensor<int32, [4]> var_6247 = const()[name = string("op_6247"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_639x = const()[name = string("concat_639x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6248_cast_fp16 = transpose(perm = var_6247, x = var_6246_cast_fp16)[name = string("transpose_409")];
+            tensor<fp16, [1, ?, 1280]> x_517_cast_fp16 = reshape(shape = concat_639x, x = var_6248_cast_fp16)[name = string("x_517_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6252_to_fp16 = const()[name = string("op_6252_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440261056)))];
+            tensor<fp16, [1280]> var_6253_to_fp16 = const()[name = string("op_6253_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443537920)))];
+            tensor<fp16, [1, ?, 1280]> linear_229_cast_fp16 = linear(bias = var_6253_to_fp16, weight = var_6252_to_fp16, x = x_517_cast_fp16)[name = string("linear_229_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_519_cast_fp16 = add(x = x_513_cast_fp16, y = linear_229_cast_fp16)[name = string("x_519_cast_fp16")];
+            tensor<int32, [1]> var_6260_axes_0 = const()[name = string("op_6260_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_28_mlp_ln_weight_to_fp16 = const()[name = string("blocks_28_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443540544)))];
+            tensor<fp16, [1280]> blocks_28_mlp_ln_bias_to_fp16 = const()[name = string("blocks_28_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443543168)))];
+            tensor<fp16, [1, ?, 1280]> var_6260_cast_fp16 = layer_norm(axes = var_6260_axes_0, beta = blocks_28_mlp_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_mlp_ln_weight_to_fp16, x = x_519_cast_fp16)[name = string("op_6260_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_6269_to_fp16 = const()[name = string("op_6269_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443545792)))];
+            tensor<fp16, [5120]> var_6270_to_fp16 = const()[name = string("op_6270_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1456653056)))];
+            tensor<fp16, [1, ?, 5120]> linear_230_cast_fp16 = linear(bias = var_6270_to_fp16, weight = var_6269_to_fp16, x = var_6260_cast_fp16)[name = string("linear_230_cast_fp16")];
+            string x_523_mode_0 = const()[name = string("x_523_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_523_cast_fp16 = gelu(mode = x_523_mode_0, x = linear_230_cast_fp16)[name = string("x_523_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_6275_to_fp16 = const()[name = string("op_6275_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1456663360)))];
+            tensor<fp16, [1280]> var_6276_to_fp16 = const()[name = string("op_6276_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469770624)))];
+            tensor<fp16, [1, ?, 1280]> linear_231_cast_fp16 = linear(bias = var_6276_to_fp16, weight = var_6275_to_fp16, x = x_523_cast_fp16)[name = string("linear_231_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_525_cast_fp16 = add(x = x_519_cast_fp16, y = linear_231_cast_fp16)[name = string("x_525_cast_fp16")];
+            tensor<int32, [4]> k_cache_117_begin_0 = const()[name = string("k_cache_117_begin_0"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_117_end_0 = const()[name = string("k_cache_117_end_0"), val = tensor<int32, [4]>([30, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_117_end_mask_0 = const()[name = string("k_cache_117_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_117_squeeze_mask_0 = const()[name = string("k_cache_117_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_117_cast_fp16 = slice_by_index(begin = k_cache_117_begin_0, end = k_cache_117_end_0, end_mask = k_cache_117_end_mask_0, squeeze_mask = k_cache_117_squeeze_mask_0, x = coreml_update_state_120)[name = string("k_cache_117_cast_fp16")];
+            tensor<int32, [4]> v_cache_117_begin_0 = const()[name = string("v_cache_117_begin_0"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_117_end_0 = const()[name = string("v_cache_117_end_0"), val = tensor<int32, [4]>([30, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_117_end_mask_0 = const()[name = string("v_cache_117_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_117_squeeze_mask_0 = const()[name = string("v_cache_117_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_117_cast_fp16 = slice_by_index(begin = v_cache_117_begin_0, end = v_cache_117_end_0, end_mask = v_cache_117_end_mask_0, squeeze_mask = v_cache_117_squeeze_mask_0, x = coreml_update_state_121)[name = string("v_cache_117_cast_fp16")];
+            tensor<int32, [4]> k_cache_119_begin_0 = const()[name = string("k_cache_119_begin_0"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_119_end_0 = const()[name = string("k_cache_119_end_0"), val = tensor<int32, [4]>([30, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_119_end_mask_0 = const()[name = string("k_cache_119_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_119_squeeze_mask_0 = const()[name = string("k_cache_119_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_119_cast_fp16 = slice_by_index(begin = k_cache_119_begin_0, end = k_cache_119_end_0, end_mask = k_cache_119_end_mask_0, squeeze_mask = k_cache_119_squeeze_mask_0, x = read_state_2)[name = string("k_cache_119_cast_fp16")];
+            tensor<int32, [4]> v_cache_119_begin_0 = const()[name = string("v_cache_119_begin_0"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_119_end_0 = const()[name = string("v_cache_119_end_0"), val = tensor<int32, [4]>([30, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_119_end_mask_0 = const()[name = string("v_cache_119_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_119_squeeze_mask_0 = const()[name = string("v_cache_119_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_119_cast_fp16 = slice_by_index(begin = v_cache_119_begin_0, end = v_cache_119_end_0, end_mask = v_cache_119_end_mask_0, squeeze_mask = v_cache_119_squeeze_mask_0, x = read_state_3)[name = string("v_cache_119_cast_fp16")];
+            int32 var_6299 = const()[name = string("op_6299"), val = int32(-1)];
+            tensor<int32, [1]> var_6317_axes_0 = const()[name = string("op_6317_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_29_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469773248)))];
+            tensor<fp16, [1280]> blocks_29_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469775872)))];
+            fp16 var_6305_to_fp16 = const()[name = string("op_6305_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_6317_cast_fp16 = layer_norm(axes = var_6317_axes_0, beta = blocks_29_attn_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_attn_ln_weight_to_fp16, x = x_525_cast_fp16)[name = string("op_6317_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6328_to_fp16 = const()[name = string("op_6328_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469778496)))];
+            tensor<fp16, [1280]> var_6329_to_fp16 = const()[name = string("op_6329_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1473055360)))];
+            tensor<fp16, [1, ?, 1280]> linear_232_cast_fp16 = linear(bias = var_6329_to_fp16, weight = var_6328_to_fp16, x = var_6317_cast_fp16)[name = string("linear_232_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1473057984)))];
+            tensor<fp16, [1, ?, 1280]> linear_233_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6332_to_fp16, x = var_6317_cast_fp16)[name = string("linear_233_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6336_to_fp16 = const()[name = string("op_6336_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1476334848)))];
+            tensor<fp16, [1280]> var_6337_to_fp16 = const()[name = string("op_6337_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1479611712)))];
+            tensor<fp16, [1, ?, 1280]> linear_234_cast_fp16 = linear(bias = var_6337_to_fp16, weight = var_6336_to_fp16, x = var_6317_cast_fp16)[name = string("linear_234_cast_fp16")];
+            tensor<int32, [3]> var_6339_shape_cast_fp16 = shape(x = linear_232_cast_fp16)[name = string("op_6339_shape_cast_fp16")];
+            int32 gather_350_axis_0 = const()[name = string("gather_350_axis_0"), val = int32(0)];
+            int32 gather_350_batch_dims_0 = const()[name = string("gather_350_batch_dims_0"), val = int32(0)];
+            bool gather_350_validate_indices_0 = const()[name = string("gather_350_validate_indices_0"), val = bool(false)];
+            string var_6339_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6339_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_350_to_uint16 = const()[name = string("select_350_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_6339_shape_cast_fp16_to_uint16 = cast(dtype = var_6339_shape_cast_fp16_to_uint16_dtype_0, x = var_6339_shape_cast_fp16)[name = string("cast_332")];
+            uint16 gather_350_cast_uint16 = gather(axis = gather_350_axis_0, batch_dims = gather_350_batch_dims_0, indices = select_350_to_uint16, validate_indices = gather_350_validate_indices_0, x = var_6339_shape_cast_fp16_to_uint16)[name = string("gather_350_cast_uint16")];
+            string gather_350_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_350_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_350_cast_uint16_to_int32 = cast(dtype = gather_350_cast_uint16_to_int32_dtype_0, x = gather_350_cast_uint16)[name = string("cast_331")];
+            int32 end_step_61 = add(x = offset, y = gather_350_cast_uint16_to_int32)[name = string("end_step_61")];
+            tensor<int32, [1]> expand_dims_464 = const()[name = string("expand_dims_464"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_466 = const()[name = string("expand_dims_466"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_467_axes_0 = const()[name = string("expand_dims_467_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_467 = expand_dims(axes = expand_dims_467_axes_0, x = end_step_61)[name = string("expand_dims_467")];
+            tensor<int32, [1]> concat_642_values0_0 = const()[name = string("concat_642_values0_0"), val = tensor<int32, [1]>([29])];
+            int32 concat_642_axis_0 = const()[name = string("concat_642_axis_0"), val = int32(0)];
+            bool concat_642_interleave_0 = const()[name = string("concat_642_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_642 = concat(axis = concat_642_axis_0, interleave = concat_642_interleave_0, values = (concat_642_values0_0, expand_dims_464, expand_dims_1, expand_dims_466))[name = string("concat_642")];
+            tensor<int32, [1]> concat_643_values0_0 = const()[name = string("concat_643_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_643_values1_0 = const()[name = string("concat_643_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_643_values3_0 = const()[name = string("concat_643_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_643_axis_0 = const()[name = string("concat_643_axis_0"), val = int32(0)];
+            bool concat_643_interleave_0 = const()[name = string("concat_643_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_643 = concat(axis = concat_643_axis_0, interleave = concat_643_interleave_0, values = (concat_643_values0_0, concat_643_values1_0, expand_dims_467, concat_643_values3_0))[name = string("concat_643")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_30_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_30_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_30_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_30_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_642, begin_mask = k_cache1_internal_tensor_assign_30_begin_mask_0, end = concat_643, end_mask = k_cache1_internal_tensor_assign_30_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_30_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_30_stride_0, update = linear_233_cast_fp16, x = coreml_update_state_120)[name = string("k_cache1_internal_tensor_assign_30_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_30_cast_fp16, input = k_cache1)[name = string("coreml_update_state_122_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_122 = read_state(input = k_cache1)[name = string("coreml_update_state_122")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_30_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_30_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_30_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_30_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_642, begin_mask = v_cache1_internal_tensor_assign_30_begin_mask_0, end = concat_643, end_mask = v_cache1_internal_tensor_assign_30_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_30_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_30_stride_0, update = linear_234_cast_fp16, x = coreml_update_state_121)[name = string("v_cache1_internal_tensor_assign_30_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_30_cast_fp16, input = v_cache1)[name = string("coreml_update_state_123_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_123 = read_state(input = v_cache1)[name = string("coreml_update_state_123")];
+            int32 concat_648_values0_0 = const()[name = string("concat_648_values0_0"), val = int32(1)];
+            int32 concat_648_values2_0 = const()[name = string("concat_648_values2_0"), val = int32(1280)];
+            int32 concat_648_axis_0 = const()[name = string("concat_648_axis_0"), val = int32(0)];
+            bool concat_648_interleave_0 = const()[name = string("concat_648_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_648 = concat(axis = concat_648_axis_0, interleave = concat_648_interleave_0, values = (concat_648_values0_0, end_step_61, concat_648_values2_0))[name = string("concat_648")];
+            tensor<int32, [3]> var_6355_begin_0 = const()[name = string("op_6355_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6355_end_mask_0 = const()[name = string("op_6355_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6355_cast_fp16 = slice_by_index(begin = var_6355_begin_0, end = concat_648, end_mask = var_6355_end_mask_0, x = k_cache_117_cast_fp16)[name = string("op_6355_cast_fp16")];
+            tensor<int32, [3]> var_6358_begin_0 = const()[name = string("op_6358_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6358_end_mask_0 = const()[name = string("op_6358_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6358_cast_fp16 = slice_by_index(begin = var_6358_begin_0, end = concat_648, end_mask = var_6358_end_mask_0, x = v_cache_117_cast_fp16)[name = string("op_6358_cast_fp16")];
+            tensor<int32, [4]> concat_650x = const()[name = string("concat_650x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6368_cast_fp16 = reshape(shape = concat_650x, x = linear_232_cast_fp16)[name = string("op_6368_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_276_to_fp16 = const()[name = string("const_276_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_235_cast_fp16 = mul(x = var_6368_cast_fp16, y = const_276_to_fp16)[name = string("q_235_cast_fp16")];
+            tensor<int32, [4]> concat_651x = const()[name = string("concat_651x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6375_cast_fp16 = reshape(shape = concat_651x, x = var_6355_cast_fp16)[name = string("op_6375_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_277_to_fp16 = const()[name = string("const_277_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_295_cast_fp16 = mul(x = var_6375_cast_fp16, y = const_277_to_fp16)[name = string("k_295_cast_fp16")];
+            tensor<int32, [4]> concat_652x = const()[name = string("concat_652x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6382_cast_fp16 = reshape(shape = concat_652x, x = var_6358_cast_fp16)[name = string("op_6382_cast_fp16")];
+            tensor<int32, [4]> var_6383 = const()[name = string("op_6383"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_175_transpose_x_0 = const()[name = string("qk_175_transpose_x_0"), val = bool(false)];
+            bool qk_175_transpose_y_0 = const()[name = string("qk_175_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_373_perm_0 = const()[name = string("transpose_373_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_374_perm_0 = const()[name = string("transpose_374_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_374 = transpose(perm = transpose_374_perm_0, x = k_295_cast_fp16)[name = string("transpose_406")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_373 = transpose(perm = transpose_373_perm_0, x = q_235_cast_fp16)[name = string("transpose_407")];
+            tensor<fp16, [1, 20, ?, ?]> qk_175_cast_fp16 = matmul(transpose_x = qk_175_transpose_x_0, transpose_y = qk_175_transpose_y_0, x = transpose_373, y = transpose_374)[name = string("qk_175_cast_fp16")];
+            int32 concat_653_values1_0 = const()[name = string("concat_653_values1_0"), val = int32(448)];
+            int32 concat_653_axis_0 = const()[name = string("concat_653_axis_0"), val = int32(0)];
+            bool concat_653_interleave_0 = const()[name = string("concat_653_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_653 = concat(axis = concat_653_axis_0, interleave = concat_653_interleave_0, values = (gather_350_cast_uint16_to_int32, concat_653_values1_0))[name = string("concat_653")];
+            tensor<int32, [2]> var_6386_begin_0 = const()[name = string("op_6386_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6386_end_mask_0 = const()[name = string("op_6386_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_6386_cast_fp16 = slice_by_index(begin = var_6386_begin_0, end = concat_653, end_mask = var_6386_end_mask_0, x = mask_to_fp16)[name = string("op_6386_cast_fp16")];
+            int32 concat_654_values0_0 = const()[name = string("concat_654_values0_0"), val = int32(0)];
+            int32 concat_654_axis_0 = const()[name = string("concat_654_axis_0"), val = int32(0)];
+            bool concat_654_interleave_0 = const()[name = string("concat_654_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_654 = concat(axis = concat_654_axis_0, interleave = concat_654_interleave_0, values = (concat_654_values0_0, gather_350_cast_uint16_to_int32))[name = string("concat_654")];
+            tensor<int32, [2]> var_6387_begin_0 = const()[name = string("op_6387_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6387_end_mask_0 = const()[name = string("op_6387_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_6387_cast_fp16 = slice_by_index(begin = var_6387_begin_0, end = concat_654, end_mask = var_6387_end_mask_0, x = var_6386_cast_fp16)[name = string("op_6387_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_177_cast_fp16 = add(x = qk_175_cast_fp16, y = var_6387_cast_fp16)[name = string("qk_177_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_6390_cast_fp16 = softmax(axis = var_6299, x = qk_177_cast_fp16)[name = string("op_6390_cast_fp16")];
+            bool var_6392_transpose_x_0 = const()[name = string("op_6392_transpose_x_0"), val = bool(false)];
+            bool var_6392_transpose_y_0 = const()[name = string("op_6392_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_295_cast_fp16 = transpose(perm = var_6383, x = var_6382_cast_fp16)[name = string("transpose_408")];
+            tensor<fp16, [1, 20, ?, 64]> var_6392_cast_fp16 = matmul(transpose_x = var_6392_transpose_x_0, transpose_y = var_6392_transpose_y_0, x = var_6390_cast_fp16, y = v_295_cast_fp16)[name = string("op_6392_cast_fp16")];
+            tensor<int32, [4]> var_6393 = const()[name = string("op_6393"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_655x = const()[name = string("concat_655x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6394_cast_fp16 = transpose(perm = var_6393, x = var_6392_cast_fp16)[name = string("transpose_405")];
+            tensor<fp16, [1, ?, 1280]> x_529_cast_fp16 = reshape(shape = concat_655x, x = var_6394_cast_fp16)[name = string("x_529_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6398_to_fp16 = const()[name = string("op_6398_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1479614336)))];
+            tensor<fp16, [1280]> var_6399_to_fp16 = const()[name = string("op_6399_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482891200)))];
+            tensor<fp16, [1, ?, 1280]> linear_235_cast_fp16 = linear(bias = var_6399_to_fp16, weight = var_6398_to_fp16, x = x_529_cast_fp16)[name = string("linear_235_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_531_cast_fp16 = add(x = x_525_cast_fp16, y = linear_235_cast_fp16)[name = string("x_531_cast_fp16")];
+            tensor<int32, [1]> var_6406_axes_0 = const()[name = string("op_6406_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_29_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482893824)))];
+            tensor<fp16, [1280]> blocks_29_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482896448)))];
+            tensor<fp16, [1, ?, 1280]> var_6406_cast_fp16 = layer_norm(axes = var_6406_axes_0, beta = blocks_29_cross_attn_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_cross_attn_ln_weight_to_fp16, x = x_531_cast_fp16)[name = string("op_6406_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6415_to_fp16 = const()[name = string("op_6415_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482899072)))];
+            tensor<fp16, [1280]> var_6416_to_fp16 = const()[name = string("op_6416_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1486175936)))];
+            tensor<fp16, [1, ?, 1280]> linear_236_cast_fp16 = linear(bias = var_6416_to_fp16, weight = var_6415_to_fp16, x = var_6406_cast_fp16)[name = string("linear_236_cast_fp16")];
+            tensor<int32, [3]> concat_656 = const()[name = string("concat_656"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_657 = const()[name = string("concat_657"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_297_internal_tensor_assign_1_stride_0 = const()[name = string("k_297_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_297_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_297_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_297_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_297_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_656, begin_mask = k_297_internal_tensor_assign_1_begin_mask_0, end = concat_657, end_mask = k_297_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_297_internal_tensor_assign_1_squeeze_mask_0, stride = k_297_internal_tensor_assign_1_stride_0, update = k_cache_119_cast_fp16, x = k_7_to_fp16)[name = string("k_297_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_658 = const()[name = string("concat_658"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_659 = const()[name = string("concat_659"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_297_internal_tensor_assign_1_stride_0 = const()[name = string("v_297_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_297_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_297_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_297_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_297_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_658, begin_mask = v_297_internal_tensor_assign_1_begin_mask_0, end = concat_659, end_mask = v_297_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_297_internal_tensor_assign_1_squeeze_mask_0, stride = v_297_internal_tensor_assign_1_stride_0, update = v_cache_119_cast_fp16, x = k_7_to_fp16)[name = string("v_297_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_660x = const()[name = string("concat_660x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6436_cast_fp16 = reshape(shape = concat_660x, x = linear_236_cast_fp16)[name = string("op_6436_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_278_to_fp16 = const()[name = string("const_278_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_239_cast_fp16 = mul(x = var_6436_cast_fp16, y = const_278_to_fp16)[name = string("q_239_cast_fp16")];
+            tensor<int32, [4]> var_6442 = const()[name = string("op_6442"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6443_cast_fp16 = reshape(shape = var_6442, x = k_297_internal_tensor_assign_1_cast_fp16)[name = string("op_6443_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_279_to_fp16 = const()[name = string("const_279_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_299_cast_fp16 = mul(x = var_6443_cast_fp16, y = const_279_to_fp16)[name = string("k_299_cast_fp16")];
+            tensor<int32, [4]> var_6449 = const()[name = string("op_6449"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6450_cast_fp16 = reshape(shape = var_6449, x = v_297_internal_tensor_assign_1_cast_fp16)[name = string("op_6450_cast_fp16")];
+            tensor<int32, [4]> var_6451 = const()[name = string("op_6451"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_179_transpose_x_0 = const()[name = string("qk_179_transpose_x_0"), val = bool(false)];
+            bool qk_179_transpose_y_0 = const()[name = string("qk_179_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_375_perm_0 = const()[name = string("transpose_375_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_376_perm_0 = const()[name = string("transpose_376_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_376 = transpose(perm = transpose_376_perm_0, x = k_299_cast_fp16)[name = string("transpose_402")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_375 = transpose(perm = transpose_375_perm_0, x = q_239_cast_fp16)[name = string("transpose_403")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_179_cast_fp16 = matmul(transpose_x = qk_179_transpose_x_0, transpose_y = qk_179_transpose_y_0, x = transpose_375, y = transpose_376)[name = string("qk_179_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_6455_cast_fp16 = softmax(axis = var_6299, x = qk_179_cast_fp16)[name = string("op_6455_cast_fp16")];
+            bool var_6457_transpose_x_0 = const()[name = string("op_6457_transpose_x_0"), val = bool(false)];
+            bool var_6457_transpose_y_0 = const()[name = string("op_6457_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_299_cast_fp16 = transpose(perm = var_6451, x = var_6450_cast_fp16)[name = string("transpose_404")];
+            tensor<fp16, [1, 20, ?, 64]> var_6457_cast_fp16 = matmul(transpose_x = var_6457_transpose_x_0, transpose_y = var_6457_transpose_y_0, x = var_6455_cast_fp16, y = v_299_cast_fp16)[name = string("op_6457_cast_fp16")];
+            tensor<int32, [4]> var_6458 = const()[name = string("op_6458"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_661x = const()[name = string("concat_661x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6459_cast_fp16 = transpose(perm = var_6458, x = var_6457_cast_fp16)[name = string("transpose_401")];
+            tensor<fp16, [1, ?, 1280]> x_535_cast_fp16 = reshape(shape = concat_661x, x = var_6459_cast_fp16)[name = string("x_535_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6463_to_fp16 = const()[name = string("op_6463_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1486178560)))];
+            tensor<fp16, [1280]> var_6464_to_fp16 = const()[name = string("op_6464_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489455424)))];
+            tensor<fp16, [1, ?, 1280]> linear_237_cast_fp16 = linear(bias = var_6464_to_fp16, weight = var_6463_to_fp16, x = x_535_cast_fp16)[name = string("linear_237_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_537_cast_fp16 = add(x = x_531_cast_fp16, y = linear_237_cast_fp16)[name = string("x_537_cast_fp16")];
+            tensor<int32, [1]> var_6471_axes_0 = const()[name = string("op_6471_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_29_mlp_ln_weight_to_fp16 = const()[name = string("blocks_29_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489458048)))];
+            tensor<fp16, [1280]> blocks_29_mlp_ln_bias_to_fp16 = const()[name = string("blocks_29_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489460672)))];
+            tensor<fp16, [1, ?, 1280]> var_6471_cast_fp16 = layer_norm(axes = var_6471_axes_0, beta = blocks_29_mlp_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_mlp_ln_weight_to_fp16, x = x_537_cast_fp16)[name = string("op_6471_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_6480_to_fp16 = const()[name = string("op_6480_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489463296)))];
+            tensor<fp16, [5120]> var_6481_to_fp16 = const()[name = string("op_6481_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1502570560)))];
+            tensor<fp16, [1, ?, 5120]> linear_238_cast_fp16 = linear(bias = var_6481_to_fp16, weight = var_6480_to_fp16, x = var_6471_cast_fp16)[name = string("linear_238_cast_fp16")];
+            string x_541_mode_0 = const()[name = string("x_541_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_541_cast_fp16 = gelu(mode = x_541_mode_0, x = linear_238_cast_fp16)[name = string("x_541_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1502580864)))];
+            tensor<fp16, [1280]> var_6487_to_fp16 = const()[name = string("op_6487_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515688128)))];
+            tensor<fp16, [1, ?, 1280]> linear_239_cast_fp16 = linear(bias = var_6487_to_fp16, weight = var_6486_to_fp16, x = x_541_cast_fp16)[name = string("linear_239_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_543_cast_fp16 = add(x = x_537_cast_fp16, y = linear_239_cast_fp16)[name = string("x_543_cast_fp16")];
+            tensor<int32, [4]> k_cache_121_begin_0 = const()[name = string("k_cache_121_begin_0"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_121_end_0 = const()[name = string("k_cache_121_end_0"), val = tensor<int32, [4]>([31, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_121_end_mask_0 = const()[name = string("k_cache_121_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_121_squeeze_mask_0 = const()[name = string("k_cache_121_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_121_cast_fp16 = slice_by_index(begin = k_cache_121_begin_0, end = k_cache_121_end_0, end_mask = k_cache_121_end_mask_0, squeeze_mask = k_cache_121_squeeze_mask_0, x = coreml_update_state_122)[name = string("k_cache_121_cast_fp16")];
+            tensor<int32, [4]> v_cache_121_begin_0 = const()[name = string("v_cache_121_begin_0"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_121_end_0 = const()[name = string("v_cache_121_end_0"), val = tensor<int32, [4]>([31, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_121_end_mask_0 = const()[name = string("v_cache_121_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_121_squeeze_mask_0 = const()[name = string("v_cache_121_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_121_cast_fp16 = slice_by_index(begin = v_cache_121_begin_0, end = v_cache_121_end_0, end_mask = v_cache_121_end_mask_0, squeeze_mask = v_cache_121_squeeze_mask_0, x = coreml_update_state_123)[name = string("v_cache_121_cast_fp16")];
+            tensor<int32, [4]> k_cache_123_begin_0 = const()[name = string("k_cache_123_begin_0"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_123_end_0 = const()[name = string("k_cache_123_end_0"), val = tensor<int32, [4]>([31, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_123_end_mask_0 = const()[name = string("k_cache_123_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_123_squeeze_mask_0 = const()[name = string("k_cache_123_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_123_cast_fp16 = slice_by_index(begin = k_cache_123_begin_0, end = k_cache_123_end_0, end_mask = k_cache_123_end_mask_0, squeeze_mask = k_cache_123_squeeze_mask_0, x = read_state_2)[name = string("k_cache_123_cast_fp16")];
+            tensor<int32, [4]> v_cache_123_begin_0 = const()[name = string("v_cache_123_begin_0"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_123_end_0 = const()[name = string("v_cache_123_end_0"), val = tensor<int32, [4]>([31, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_123_end_mask_0 = const()[name = string("v_cache_123_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_123_squeeze_mask_0 = const()[name = string("v_cache_123_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_123_cast_fp16 = slice_by_index(begin = v_cache_123_begin_0, end = v_cache_123_end_0, end_mask = v_cache_123_end_mask_0, squeeze_mask = v_cache_123_squeeze_mask_0, x = read_state_3)[name = string("v_cache_123_cast_fp16")];
+            int32 var_6510 = const()[name = string("op_6510"), val = int32(-1)];
+            tensor<int32, [1]> var_6528_axes_0 = const()[name = string("op_6528_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_30_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515690752)))];
+            tensor<fp16, [1280]> blocks_30_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515693376)))];
+            fp16 var_6516_to_fp16 = const()[name = string("op_6516_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_6528_cast_fp16 = layer_norm(axes = var_6528_axes_0, beta = blocks_30_attn_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_attn_ln_weight_to_fp16, x = x_543_cast_fp16)[name = string("op_6528_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6539_to_fp16 = const()[name = string("op_6539_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515696000)))];
+            tensor<fp16, [1280]> var_6540_to_fp16 = const()[name = string("op_6540_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1518972864)))];
+            tensor<fp16, [1, ?, 1280]> linear_240_cast_fp16 = linear(bias = var_6540_to_fp16, weight = var_6539_to_fp16, x = var_6528_cast_fp16)[name = string("linear_240_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6543_to_fp16 = const()[name = string("op_6543_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1518975488)))];
+            tensor<fp16, [1, ?, 1280]> linear_241_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6543_to_fp16, x = var_6528_cast_fp16)[name = string("linear_241_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6547_to_fp16 = const()[name = string("op_6547_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1522252352)))];
+            tensor<fp16, [1280]> var_6548_to_fp16 = const()[name = string("op_6548_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1525529216)))];
+            tensor<fp16, [1, ?, 1280]> linear_242_cast_fp16 = linear(bias = var_6548_to_fp16, weight = var_6547_to_fp16, x = var_6528_cast_fp16)[name = string("linear_242_cast_fp16")];
+            tensor<int32, [3]> var_6550_shape_cast_fp16 = shape(x = linear_240_cast_fp16)[name = string("op_6550_shape_cast_fp16")];
+            int32 gather_362_axis_0 = const()[name = string("gather_362_axis_0"), val = int32(0)];
+            int32 gather_362_batch_dims_0 = const()[name = string("gather_362_batch_dims_0"), val = int32(0)];
+            bool gather_362_validate_indices_0 = const()[name = string("gather_362_validate_indices_0"), val = bool(false)];
+            string var_6550_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6550_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_362_to_uint16 = const()[name = string("select_362_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_6550_shape_cast_fp16_to_uint16 = cast(dtype = var_6550_shape_cast_fp16_to_uint16_dtype_0, x = var_6550_shape_cast_fp16)[name = string("cast_330")];
+            uint16 gather_362_cast_uint16 = gather(axis = gather_362_axis_0, batch_dims = gather_362_batch_dims_0, indices = select_362_to_uint16, validate_indices = gather_362_validate_indices_0, x = var_6550_shape_cast_fp16_to_uint16)[name = string("gather_362_cast_uint16")];
+            string gather_362_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_362_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_362_cast_uint16_to_int32 = cast(dtype = gather_362_cast_uint16_to_int32_dtype_0, x = gather_362_cast_uint16)[name = string("cast_329")];
+            int32 end_step_63 = add(x = offset, y = gather_362_cast_uint16_to_int32)[name = string("end_step_63")];
+            tensor<int32, [1]> expand_dims_480 = const()[name = string("expand_dims_480"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_482 = const()[name = string("expand_dims_482"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_483_axes_0 = const()[name = string("expand_dims_483_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_483 = expand_dims(axes = expand_dims_483_axes_0, x = end_step_63)[name = string("expand_dims_483")];
+            tensor<int32, [1]> concat_664_values0_0 = const()[name = string("concat_664_values0_0"), val = tensor<int32, [1]>([30])];
+            int32 concat_664_axis_0 = const()[name = string("concat_664_axis_0"), val = int32(0)];
+            bool concat_664_interleave_0 = const()[name = string("concat_664_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_664 = concat(axis = concat_664_axis_0, interleave = concat_664_interleave_0, values = (concat_664_values0_0, expand_dims_480, expand_dims_1, expand_dims_482))[name = string("concat_664")];
+            tensor<int32, [1]> concat_665_values0_0 = const()[name = string("concat_665_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_665_values1_0 = const()[name = string("concat_665_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_665_values3_0 = const()[name = string("concat_665_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_665_axis_0 = const()[name = string("concat_665_axis_0"), val = int32(0)];
+            bool concat_665_interleave_0 = const()[name = string("concat_665_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_665 = concat(axis = concat_665_axis_0, interleave = concat_665_interleave_0, values = (concat_665_values0_0, concat_665_values1_0, expand_dims_483, concat_665_values3_0))[name = string("concat_665")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_31_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_31_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_31_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_31_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_664, begin_mask = k_cache1_internal_tensor_assign_31_begin_mask_0, end = concat_665, end_mask = k_cache1_internal_tensor_assign_31_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_31_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_31_stride_0, update = linear_241_cast_fp16, x = coreml_update_state_122)[name = string("k_cache1_internal_tensor_assign_31_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_31_cast_fp16, input = k_cache1)[name = string("coreml_update_state_124_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_124 = read_state(input = k_cache1)[name = string("coreml_update_state_124")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_31_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_31_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_31_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_31_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_664, begin_mask = v_cache1_internal_tensor_assign_31_begin_mask_0, end = concat_665, end_mask = v_cache1_internal_tensor_assign_31_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_31_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_31_stride_0, update = linear_242_cast_fp16, x = coreml_update_state_123)[name = string("v_cache1_internal_tensor_assign_31_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_31_cast_fp16, input = v_cache1)[name = string("coreml_update_state_125_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_125 = read_state(input = v_cache1)[name = string("coreml_update_state_125")];
+            int32 concat_670_values0_0 = const()[name = string("concat_670_values0_0"), val = int32(1)];
+            int32 concat_670_values2_0 = const()[name = string("concat_670_values2_0"), val = int32(1280)];
+            int32 concat_670_axis_0 = const()[name = string("concat_670_axis_0"), val = int32(0)];
+            bool concat_670_interleave_0 = const()[name = string("concat_670_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_670 = concat(axis = concat_670_axis_0, interleave = concat_670_interleave_0, values = (concat_670_values0_0, end_step_63, concat_670_values2_0))[name = string("concat_670")];
+            tensor<int32, [3]> var_6566_begin_0 = const()[name = string("op_6566_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6566_end_mask_0 = const()[name = string("op_6566_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6566_cast_fp16 = slice_by_index(begin = var_6566_begin_0, end = concat_670, end_mask = var_6566_end_mask_0, x = k_cache_121_cast_fp16)[name = string("op_6566_cast_fp16")];
+            tensor<int32, [3]> var_6569_begin_0 = const()[name = string("op_6569_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6569_end_mask_0 = const()[name = string("op_6569_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6569_cast_fp16 = slice_by_index(begin = var_6569_begin_0, end = concat_670, end_mask = var_6569_end_mask_0, x = v_cache_121_cast_fp16)[name = string("op_6569_cast_fp16")];
+            tensor<int32, [4]> concat_672x = const()[name = string("concat_672x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6579_cast_fp16 = reshape(shape = concat_672x, x = linear_240_cast_fp16)[name = string("op_6579_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_243_cast_fp16 = mul(x = var_6579_cast_fp16, y = const_280_to_fp16)[name = string("q_243_cast_fp16")];
+            tensor<int32, [4]> concat_673x = const()[name = string("concat_673x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6586_cast_fp16 = reshape(shape = concat_673x, x = var_6566_cast_fp16)[name = string("op_6586_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_305_cast_fp16 = mul(x = var_6586_cast_fp16, y = const_281_to_fp16)[name = string("k_305_cast_fp16")];
+            tensor<int32, [4]> concat_674x = const()[name = string("concat_674x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6593_cast_fp16 = reshape(shape = concat_674x, x = var_6569_cast_fp16)[name = string("op_6593_cast_fp16")];
+            tensor<int32, [4]> var_6594 = const()[name = string("op_6594"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_181_transpose_x_0 = const()[name = string("qk_181_transpose_x_0"), val = bool(false)];
+            bool qk_181_transpose_y_0 = const()[name = string("qk_181_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_377_perm_0 = const()[name = string("transpose_377_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_378_perm_0 = const()[name = string("transpose_378_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_378 = transpose(perm = transpose_378_perm_0, x = k_305_cast_fp16)[name = string("transpose_398")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_377 = transpose(perm = transpose_377_perm_0, x = q_243_cast_fp16)[name = string("transpose_399")];
+            tensor<fp16, [1, 20, ?, ?]> qk_181_cast_fp16 = matmul(transpose_x = qk_181_transpose_x_0, transpose_y = qk_181_transpose_y_0, x = transpose_377, y = transpose_378)[name = string("qk_181_cast_fp16")];
+            int32 concat_675_values1_0 = const()[name = string("concat_675_values1_0"), val = int32(448)];
+            int32 concat_675_axis_0 = const()[name = string("concat_675_axis_0"), val = int32(0)];
+            bool concat_675_interleave_0 = const()[name = string("concat_675_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_675 = concat(axis = concat_675_axis_0, interleave = concat_675_interleave_0, values = (gather_362_cast_uint16_to_int32, concat_675_values1_0))[name = string("concat_675")];
+            tensor<int32, [2]> var_6597_begin_0 = const()[name = string("op_6597_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6597_end_mask_0 = const()[name = string("op_6597_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_6597_cast_fp16 = slice_by_index(begin = var_6597_begin_0, end = concat_675, end_mask = var_6597_end_mask_0, x = mask_to_fp16)[name = string("op_6597_cast_fp16")];
+            int32 concat_676_values0_0 = const()[name = string("concat_676_values0_0"), val = int32(0)];
+            int32 concat_676_axis_0 = const()[name = string("concat_676_axis_0"), val = int32(0)];
+            bool concat_676_interleave_0 = const()[name = string("concat_676_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_676 = concat(axis = concat_676_axis_0, interleave = concat_676_interleave_0, values = (concat_676_values0_0, gather_362_cast_uint16_to_int32))[name = string("concat_676")];
+            tensor<int32, [2]> var_6598_begin_0 = const()[name = string("op_6598_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6598_end_mask_0 = const()[name = string("op_6598_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_6598_cast_fp16 = slice_by_index(begin = var_6598_begin_0, end = concat_676, end_mask = var_6598_end_mask_0, x = var_6597_cast_fp16)[name = string("op_6598_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_183_cast_fp16 = add(x = qk_181_cast_fp16, y = var_6598_cast_fp16)[name = string("qk_183_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_6601_cast_fp16 = softmax(axis = var_6510, x = qk_183_cast_fp16)[name = string("op_6601_cast_fp16")];
+            bool var_6603_transpose_x_0 = const()[name = string("op_6603_transpose_x_0"), val = bool(false)];
+            bool var_6603_transpose_y_0 = const()[name = string("op_6603_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_305_cast_fp16 = transpose(perm = var_6594, x = var_6593_cast_fp16)[name = string("transpose_400")];
+            tensor<fp16, [1, 20, ?, 64]> var_6603_cast_fp16 = matmul(transpose_x = var_6603_transpose_x_0, transpose_y = var_6603_transpose_y_0, x = var_6601_cast_fp16, y = v_305_cast_fp16)[name = string("op_6603_cast_fp16")];
+            tensor<int32, [4]> var_6604 = const()[name = string("op_6604"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_677x = const()[name = string("concat_677x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6605_cast_fp16 = transpose(perm = var_6604, x = var_6603_cast_fp16)[name = string("transpose_397")];
+            tensor<fp16, [1, ?, 1280]> x_547_cast_fp16 = reshape(shape = concat_677x, x = var_6605_cast_fp16)[name = string("x_547_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6609_to_fp16 = const()[name = string("op_6609_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1525531840)))];
+            tensor<fp16, [1280]> var_6610_to_fp16 = const()[name = string("op_6610_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528808704)))];
+            tensor<fp16, [1, ?, 1280]> linear_243_cast_fp16 = linear(bias = var_6610_to_fp16, weight = var_6609_to_fp16, x = x_547_cast_fp16)[name = string("linear_243_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_549_cast_fp16 = add(x = x_543_cast_fp16, y = linear_243_cast_fp16)[name = string("x_549_cast_fp16")];
+            tensor<int32, [1]> var_6617_axes_0 = const()[name = string("op_6617_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_30_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528811328)))];
+            tensor<fp16, [1280]> blocks_30_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528813952)))];
+            tensor<fp16, [1, ?, 1280]> var_6617_cast_fp16 = layer_norm(axes = var_6617_axes_0, beta = blocks_30_cross_attn_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_cross_attn_ln_weight_to_fp16, x = x_549_cast_fp16)[name = string("op_6617_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6626_to_fp16 = const()[name = string("op_6626_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528816576)))];
+            tensor<fp16, [1280]> var_6627_to_fp16 = const()[name = string("op_6627_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1532093440)))];
+            tensor<fp16, [1, ?, 1280]> linear_244_cast_fp16 = linear(bias = var_6627_to_fp16, weight = var_6626_to_fp16, x = var_6617_cast_fp16)[name = string("linear_244_cast_fp16")];
+            tensor<int32, [3]> concat_678 = const()[name = string("concat_678"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_679 = const()[name = string("concat_679"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_307_internal_tensor_assign_1_stride_0 = const()[name = string("k_307_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_307_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_307_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_307_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_307_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_678, begin_mask = k_307_internal_tensor_assign_1_begin_mask_0, end = concat_679, end_mask = k_307_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_307_internal_tensor_assign_1_squeeze_mask_0, stride = k_307_internal_tensor_assign_1_stride_0, update = k_cache_123_cast_fp16, x = k_7_to_fp16)[name = string("k_307_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_680 = const()[name = string("concat_680"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_681 = const()[name = string("concat_681"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_307_internal_tensor_assign_1_stride_0 = const()[name = string("v_307_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_307_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_307_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_307_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_307_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_680, begin_mask = v_307_internal_tensor_assign_1_begin_mask_0, end = concat_681, end_mask = v_307_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_307_internal_tensor_assign_1_squeeze_mask_0, stride = v_307_internal_tensor_assign_1_stride_0, update = v_cache_123_cast_fp16, x = k_7_to_fp16)[name = string("v_307_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_682x = const()[name = string("concat_682x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6647_cast_fp16 = reshape(shape = concat_682x, x = linear_244_cast_fp16)[name = string("op_6647_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_282_to_fp16 = const()[name = string("const_282_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_247_cast_fp16 = mul(x = var_6647_cast_fp16, y = const_282_to_fp16)[name = string("q_247_cast_fp16")];
+            tensor<int32, [4]> var_6653 = const()[name = string("op_6653"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6654_cast_fp16 = reshape(shape = var_6653, x = k_307_internal_tensor_assign_1_cast_fp16)[name = string("op_6654_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_309_cast_fp16 = mul(x = var_6654_cast_fp16, y = const_283_to_fp16)[name = string("k_309_cast_fp16")];
+            tensor<int32, [4]> var_6660 = const()[name = string("op_6660"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6661_cast_fp16 = reshape(shape = var_6660, x = v_307_internal_tensor_assign_1_cast_fp16)[name = string("op_6661_cast_fp16")];
+            tensor<int32, [4]> var_6662 = const()[name = string("op_6662"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_185_transpose_x_0 = const()[name = string("qk_185_transpose_x_0"), val = bool(false)];
+            bool qk_185_transpose_y_0 = const()[name = string("qk_185_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_379_perm_0 = const()[name = string("transpose_379_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_380_perm_0 = const()[name = string("transpose_380_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_380 = transpose(perm = transpose_380_perm_0, x = k_309_cast_fp16)[name = string("transpose_394")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_379 = transpose(perm = transpose_379_perm_0, x = q_247_cast_fp16)[name = string("transpose_395")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_185_cast_fp16 = matmul(transpose_x = qk_185_transpose_x_0, transpose_y = qk_185_transpose_y_0, x = transpose_379, y = transpose_380)[name = string("qk_185_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_6666_cast_fp16 = softmax(axis = var_6510, x = qk_185_cast_fp16)[name = string("op_6666_cast_fp16")];
+            bool var_6668_transpose_x_0 = const()[name = string("op_6668_transpose_x_0"), val = bool(false)];
+            bool var_6668_transpose_y_0 = const()[name = string("op_6668_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_309_cast_fp16 = transpose(perm = var_6662, x = var_6661_cast_fp16)[name = string("transpose_396")];
+            tensor<fp16, [1, 20, ?, 64]> var_6668_cast_fp16 = matmul(transpose_x = var_6668_transpose_x_0, transpose_y = var_6668_transpose_y_0, x = var_6666_cast_fp16, y = v_309_cast_fp16)[name = string("op_6668_cast_fp16")];
+            tensor<int32, [4]> var_6669 = const()[name = string("op_6669"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_683x = const()[name = string("concat_683x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6670_cast_fp16 = transpose(perm = var_6669, x = var_6668_cast_fp16)[name = string("transpose_393")];
+            tensor<fp16, [1, ?, 1280]> x_553_cast_fp16 = reshape(shape = concat_683x, x = var_6670_cast_fp16)[name = string("x_553_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6674_to_fp16 = const()[name = string("op_6674_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1532096064)))];
+            tensor<fp16, [1280]> var_6675_to_fp16 = const()[name = string("op_6675_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535372928)))];
+            tensor<fp16, [1, ?, 1280]> linear_245_cast_fp16 = linear(bias = var_6675_to_fp16, weight = var_6674_to_fp16, x = x_553_cast_fp16)[name = string("linear_245_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_555_cast_fp16 = add(x = x_549_cast_fp16, y = linear_245_cast_fp16)[name = string("x_555_cast_fp16")];
+            tensor<int32, [1]> var_6682_axes_0 = const()[name = string("op_6682_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_30_mlp_ln_weight_to_fp16 = const()[name = string("blocks_30_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535375552)))];
+            tensor<fp16, [1280]> blocks_30_mlp_ln_bias_to_fp16 = const()[name = string("blocks_30_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535378176)))];
+            tensor<fp16, [1, ?, 1280]> var_6682_cast_fp16 = layer_norm(axes = var_6682_axes_0, beta = blocks_30_mlp_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_mlp_ln_weight_to_fp16, x = x_555_cast_fp16)[name = string("op_6682_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_6691_to_fp16 = const()[name = string("op_6691_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535380800)))];
+            tensor<fp16, [5120]> var_6692_to_fp16 = const()[name = string("op_6692_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1548488064)))];
+            tensor<fp16, [1, ?, 5120]> linear_246_cast_fp16 = linear(bias = var_6692_to_fp16, weight = var_6691_to_fp16, x = var_6682_cast_fp16)[name = string("linear_246_cast_fp16")];
+            string x_559_mode_0 = const()[name = string("x_559_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_559_cast_fp16 = gelu(mode = x_559_mode_0, x = linear_246_cast_fp16)[name = string("x_559_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_6697_to_fp16 = const()[name = string("op_6697_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1548498368)))];
+            tensor<fp16, [1280]> var_6698_to_fp16 = const()[name = string("op_6698_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561605632)))];
+            tensor<fp16, [1, ?, 1280]> linear_247_cast_fp16 = linear(bias = var_6698_to_fp16, weight = var_6697_to_fp16, x = x_559_cast_fp16)[name = string("linear_247_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_561_cast_fp16 = add(x = x_555_cast_fp16, y = linear_247_cast_fp16)[name = string("x_561_cast_fp16")];
+            tensor<int32, [4]> k_cache_125_begin_0 = const()[name = string("k_cache_125_begin_0"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_125_end_0 = const()[name = string("k_cache_125_end_0"), val = tensor<int32, [4]>([32, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_125_end_mask_0 = const()[name = string("k_cache_125_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_125_squeeze_mask_0 = const()[name = string("k_cache_125_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_125_cast_fp16 = slice_by_index(begin = k_cache_125_begin_0, end = k_cache_125_end_0, end_mask = k_cache_125_end_mask_0, squeeze_mask = k_cache_125_squeeze_mask_0, x = coreml_update_state_124)[name = string("k_cache_125_cast_fp16")];
+            tensor<int32, [4]> v_cache_125_begin_0 = const()[name = string("v_cache_125_begin_0"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_125_end_0 = const()[name = string("v_cache_125_end_0"), val = tensor<int32, [4]>([32, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_125_end_mask_0 = const()[name = string("v_cache_125_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_125_squeeze_mask_0 = const()[name = string("v_cache_125_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_125_cast_fp16 = slice_by_index(begin = v_cache_125_begin_0, end = v_cache_125_end_0, end_mask = v_cache_125_end_mask_0, squeeze_mask = v_cache_125_squeeze_mask_0, x = coreml_update_state_125)[name = string("v_cache_125_cast_fp16")];
+            tensor<int32, [4]> k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor<int32, [4]>([32, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")];
+            tensor<int32, [4]> v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor<int32, [4]>([32, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")];
+            int32 var_6721 = const()[name = string("op_6721"), val = int32(-1)];
+            tensor<int32, [1]> var_6739_axes_0 = const()[name = string("op_6739_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_31_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561608256)))];
+            tensor<fp16, [1280]> blocks_31_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561610880)))];
+            fp16 var_6727_to_fp16 = const()[name = string("op_6727_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_6739_cast_fp16 = layer_norm(axes = var_6739_axes_0, beta = blocks_31_attn_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_attn_ln_weight_to_fp16, x = x_561_cast_fp16)[name = string("op_6739_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6750_to_fp16 = const()[name = string("op_6750_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561613504)))];
+            tensor<fp16, [1280]> var_6751_to_fp16 = const()[name = string("op_6751_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564890368)))];
+            tensor<fp16, [1, ?, 1280]> linear_248_cast_fp16 = linear(bias = var_6751_to_fp16, weight = var_6750_to_fp16, x = var_6739_cast_fp16)[name = string("linear_248_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6754_to_fp16 = const()[name = string("op_6754_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564892992)))];
+            tensor<fp16, [1, ?, 1280]> linear_249_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6754_to_fp16, x = var_6739_cast_fp16)[name = string("linear_249_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6758_to_fp16 = const()[name = string("op_6758_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1568169856)))];
+            tensor<fp16, [1280]> var_6759_to_fp16 = const()[name = string("op_6759_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1571446720)))];
+            tensor<fp16, [1, ?, 1280]> linear_250_cast_fp16 = linear(bias = var_6759_to_fp16, weight = var_6758_to_fp16, x = var_6739_cast_fp16)[name = string("linear_250_cast_fp16")];
+            tensor<int32, [3]> var_6761_shape_cast_fp16 = shape(x = linear_248_cast_fp16)[name = string("op_6761_shape_cast_fp16")];
+            int32 gather_374_axis_0 = const()[name = string("gather_374_axis_0"), val = int32(0)];
+            int32 gather_374_batch_dims_0 = const()[name = string("gather_374_batch_dims_0"), val = int32(0)];
+            bool gather_374_validate_indices_0 = const()[name = string("gather_374_validate_indices_0"), val = bool(false)];
+            string var_6761_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6761_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_374_to_uint16 = const()[name = string("select_374_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_6761_shape_cast_fp16_to_uint16 = cast(dtype = var_6761_shape_cast_fp16_to_uint16_dtype_0, x = var_6761_shape_cast_fp16)[name = string("cast_328")];
+            uint16 gather_374_cast_uint16 = gather(axis = gather_374_axis_0, batch_dims = gather_374_batch_dims_0, indices = select_374_to_uint16, validate_indices = gather_374_validate_indices_0, x = var_6761_shape_cast_fp16_to_uint16)[name = string("gather_374_cast_uint16")];
+            string gather_374_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_374_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_374_cast_uint16_to_int32 = cast(dtype = gather_374_cast_uint16_to_int32_dtype_0, x = gather_374_cast_uint16)[name = string("cast_327")];
+            int32 end_step = add(x = offset, y = gather_374_cast_uint16_to_int32)[name = string("end_step")];
+            tensor<int32, [1]> expand_dims_496 = const()[name = string("expand_dims_496"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_498 = const()[name = string("expand_dims_498"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_499_axes_0 = const()[name = string("expand_dims_499_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_499 = expand_dims(axes = expand_dims_499_axes_0, x = end_step)[name = string("expand_dims_499")];
+            tensor<int32, [1]> concat_686_values0_0 = const()[name = string("concat_686_values0_0"), val = tensor<int32, [1]>([31])];
+            int32 concat_686_axis_0 = const()[name = string("concat_686_axis_0"), val = int32(0)];
+            bool concat_686_interleave_0 = const()[name = string("concat_686_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_686 = concat(axis = concat_686_axis_0, interleave = concat_686_interleave_0, values = (concat_686_values0_0, expand_dims_496, expand_dims_1, expand_dims_498))[name = string("concat_686")];
+            tensor<int32, [1]> concat_687_values0_0 = const()[name = string("concat_687_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_687_values1_0 = const()[name = string("concat_687_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_687_values3_0 = const()[name = string("concat_687_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_687_axis_0 = const()[name = string("concat_687_axis_0"), val = int32(0)];
+            bool concat_687_interleave_0 = const()[name = string("concat_687_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_687 = concat(axis = concat_687_axis_0, interleave = concat_687_interleave_0, values = (concat_687_values0_0, concat_687_values1_0, expand_dims_499, concat_687_values3_0))[name = string("concat_687")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_32_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_32_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_32_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_32_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_686, begin_mask = k_cache1_internal_tensor_assign_32_begin_mask_0, end = concat_687, end_mask = k_cache1_internal_tensor_assign_32_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_32_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_32_stride_0, update = linear_249_cast_fp16, x = coreml_update_state_124)[name = string("k_cache1_internal_tensor_assign_32_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_32_cast_fp16, input = k_cache1)[name = string("coreml_update_state_126_write_state")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_32_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_32_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_32_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_32_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_686, begin_mask = v_cache1_internal_tensor_assign_32_begin_mask_0, end = concat_687, end_mask = v_cache1_internal_tensor_assign_32_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_32_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_32_stride_0, update = linear_250_cast_fp16, x = coreml_update_state_125)[name = string("v_cache1_internal_tensor_assign_32_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_32_cast_fp16, input = v_cache1)[name = string("coreml_update_state_127_write_state")];
+            int32 concat_692_values0_0 = const()[name = string("concat_692_values0_0"), val = int32(1)];
+            int32 concat_692_values2_0 = const()[name = string("concat_692_values2_0"), val = int32(1280)];
+            int32 concat_692_axis_0 = const()[name = string("concat_692_axis_0"), val = int32(0)];
+            bool concat_692_interleave_0 = const()[name = string("concat_692_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_692 = concat(axis = concat_692_axis_0, interleave = concat_692_interleave_0, values = (concat_692_values0_0, end_step, concat_692_values2_0))[name = string("concat_692")];
+            tensor<int32, [3]> var_6777_begin_0 = const()[name = string("op_6777_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6777_end_mask_0 = const()[name = string("op_6777_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6777_cast_fp16 = slice_by_index(begin = var_6777_begin_0, end = concat_692, end_mask = var_6777_end_mask_0, x = k_cache_125_cast_fp16)[name = string("op_6777_cast_fp16")];
+            tensor<int32, [3]> var_6780_begin_0 = const()[name = string("op_6780_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6780_end_mask_0 = const()[name = string("op_6780_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6780_cast_fp16 = slice_by_index(begin = var_6780_begin_0, end = concat_692, end_mask = var_6780_end_mask_0, x = v_cache_125_cast_fp16)[name = string("op_6780_cast_fp16")];
+            tensor<int32, [4]> concat_694x = const()[name = string("concat_694x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6790_cast_fp16 = reshape(shape = concat_694x, x = linear_248_cast_fp16)[name = string("op_6790_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_284_to_fp16 = const()[name = string("const_284_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_251_cast_fp16 = mul(x = var_6790_cast_fp16, y = const_284_to_fp16)[name = string("q_251_cast_fp16")];
+            tensor<int32, [4]> concat_695x = const()[name = string("concat_695x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6797_cast_fp16 = reshape(shape = concat_695x, x = var_6777_cast_fp16)[name = string("op_6797_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_315_cast_fp16 = mul(x = var_6797_cast_fp16, y = const_285_to_fp16)[name = string("k_315_cast_fp16")];
+            tensor<int32, [4]> concat_696x = const()[name = string("concat_696x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6804_cast_fp16 = reshape(shape = concat_696x, x = var_6780_cast_fp16)[name = string("op_6804_cast_fp16")];
+            tensor<int32, [4]> var_6805 = const()[name = string("op_6805"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_187_transpose_x_0 = const()[name = string("qk_187_transpose_x_0"), val = bool(false)];
+            bool qk_187_transpose_y_0 = const()[name = string("qk_187_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_381_perm_0 = const()[name = string("transpose_381_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_382_perm_0 = const()[name = string("transpose_382_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_382 = transpose(perm = transpose_382_perm_0, x = k_315_cast_fp16)[name = string("transpose_390")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_381 = transpose(perm = transpose_381_perm_0, x = q_251_cast_fp16)[name = string("transpose_391")];
+            tensor<fp16, [1, 20, ?, ?]> qk_187_cast_fp16 = matmul(transpose_x = qk_187_transpose_x_0, transpose_y = qk_187_transpose_y_0, x = transpose_381, y = transpose_382)[name = string("qk_187_cast_fp16")];
+            int32 concat_697_values1_0 = const()[name = string("concat_697_values1_0"), val = int32(448)];
+            int32 concat_697_axis_0 = const()[name = string("concat_697_axis_0"), val = int32(0)];
+            bool concat_697_interleave_0 = const()[name = string("concat_697_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_697 = concat(axis = concat_697_axis_0, interleave = concat_697_interleave_0, values = (gather_374_cast_uint16_to_int32, concat_697_values1_0))[name = string("concat_697")];
+            tensor<int32, [2]> var_6808_begin_0 = const()[name = string("op_6808_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6808_end_mask_0 = const()[name = string("op_6808_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_6808_cast_fp16 = slice_by_index(begin = var_6808_begin_0, end = concat_697, end_mask = var_6808_end_mask_0, x = mask_to_fp16)[name = string("op_6808_cast_fp16")];
+            int32 concat_698_values0_0 = const()[name = string("concat_698_values0_0"), val = int32(0)];
+            int32 concat_698_axis_0 = const()[name = string("concat_698_axis_0"), val = int32(0)];
+            bool concat_698_interleave_0 = const()[name = string("concat_698_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_698 = concat(axis = concat_698_axis_0, interleave = concat_698_interleave_0, values = (concat_698_values0_0, gather_374_cast_uint16_to_int32))[name = string("concat_698")];
+            tensor<int32, [2]> var_6809_begin_0 = const()[name = string("op_6809_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6809_end_mask_0 = const()[name = string("op_6809_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_6809_cast_fp16 = slice_by_index(begin = var_6809_begin_0, end = concat_698, end_mask = var_6809_end_mask_0, x = var_6808_cast_fp16)[name = string("op_6809_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_189_cast_fp16 = add(x = qk_187_cast_fp16, y = var_6809_cast_fp16)[name = string("qk_189_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_6812_cast_fp16 = softmax(axis = var_6721, x = qk_189_cast_fp16)[name = string("op_6812_cast_fp16")];
+            bool var_6814_transpose_x_0 = const()[name = string("op_6814_transpose_x_0"), val = bool(false)];
+            bool var_6814_transpose_y_0 = const()[name = string("op_6814_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_315_cast_fp16 = transpose(perm = var_6805, x = var_6804_cast_fp16)[name = string("transpose_392")];
+            tensor<fp16, [1, 20, ?, 64]> var_6814_cast_fp16 = matmul(transpose_x = var_6814_transpose_x_0, transpose_y = var_6814_transpose_y_0, x = var_6812_cast_fp16, y = v_315_cast_fp16)[name = string("op_6814_cast_fp16")];
+            tensor<int32, [4]> var_6815 = const()[name = string("op_6815"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_699x = const()[name = string("concat_699x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6816_cast_fp16 = transpose(perm = var_6815, x = var_6814_cast_fp16)[name = string("transpose_389")];
+            tensor<fp16, [1, ?, 1280]> x_565_cast_fp16 = reshape(shape = concat_699x, x = var_6816_cast_fp16)[name = string("x_565_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6820_to_fp16 = const()[name = string("op_6820_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1571449344)))];
+            tensor<fp16, [1280]> var_6821_to_fp16 = const()[name = string("op_6821_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574726208)))];
+            tensor<fp16, [1, ?, 1280]> linear_251_cast_fp16 = linear(bias = var_6821_to_fp16, weight = var_6820_to_fp16, x = x_565_cast_fp16)[name = string("linear_251_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_567_cast_fp16 = add(x = x_561_cast_fp16, y = linear_251_cast_fp16)[name = string("x_567_cast_fp16")];
+            tensor<int32, [1]> var_6828_axes_0 = const()[name = string("op_6828_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_31_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574728832)))];
+            tensor<fp16, [1280]> blocks_31_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574731456)))];
+            tensor<fp16, [1, ?, 1280]> var_6828_cast_fp16 = layer_norm(axes = var_6828_axes_0, beta = blocks_31_cross_attn_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_cross_attn_ln_weight_to_fp16, x = x_567_cast_fp16)[name = string("op_6828_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6837_to_fp16 = const()[name = string("op_6837_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574734080)))];
+            tensor<fp16, [1280]> var_6838_to_fp16 = const()[name = string("op_6838_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578010944)))];
+            tensor<fp16, [1, ?, 1280]> linear_252_cast_fp16 = linear(bias = var_6838_to_fp16, weight = var_6837_to_fp16, x = var_6828_cast_fp16)[name = string("linear_252_cast_fp16")];
+            tensor<int32, [3]> concat_700 = const()[name = string("concat_700"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_701 = const()[name = string("concat_701"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_317_internal_tensor_assign_1_stride_0 = const()[name = string("k_317_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_317_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_317_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_317_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_317_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_700, begin_mask = k_317_internal_tensor_assign_1_begin_mask_0, end = concat_701, end_mask = k_317_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_317_internal_tensor_assign_1_squeeze_mask_0, stride = k_317_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_317_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_702 = const()[name = string("concat_702"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_703 = const()[name = string("concat_703"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_317_internal_tensor_assign_1_stride_0 = const()[name = string("v_317_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_317_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_317_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_317_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_317_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_702, begin_mask = v_317_internal_tensor_assign_1_begin_mask_0, end = concat_703, end_mask = v_317_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_317_internal_tensor_assign_1_squeeze_mask_0, stride = v_317_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_317_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_704x = const()[name = string("concat_704x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6858_cast_fp16 = reshape(shape = concat_704x, x = linear_252_cast_fp16)[name = string("op_6858_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_286_to_fp16 = const()[name = string("const_286_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_cast_fp16 = mul(x = var_6858_cast_fp16, y = const_286_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_6864 = const()[name = string("op_6864"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6865_cast_fp16 = reshape(shape = var_6864, x = k_317_internal_tensor_assign_1_cast_fp16)[name = string("op_6865_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_cast_fp16 = mul(x = var_6865_cast_fp16, y = const_287_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_6871 = const()[name = string("op_6871"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6872_cast_fp16 = reshape(shape = var_6871, x = v_317_internal_tensor_assign_1_cast_fp16)[name = string("op_6872_cast_fp16")];
+            tensor<int32, [4]> var_6873 = const()[name = string("op_6873"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_383_perm_0 = const()[name = string("transpose_383_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_384_perm_0 = const()[name = string("transpose_384_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_384 = transpose(perm = transpose_384_perm_0, x = k_cast_fp16)[name = string("transpose_386")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_383 = transpose(perm = transpose_383_perm_0, x = q_cast_fp16)[name = string("transpose_387")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_383, y = transpose_384)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_6877_cast_fp16 = softmax(axis = var_6721, x = qk_cast_fp16)[name = string("op_6877_cast_fp16")];
+            bool var_6879_transpose_x_0 = const()[name = string("op_6879_transpose_x_0"), val = bool(false)];
+            bool var_6879_transpose_y_0 = const()[name = string("op_6879_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_cast_fp16 = transpose(perm = var_6873, x = var_6872_cast_fp16)[name = string("transpose_388")];
+            tensor<fp16, [1, 20, ?, 64]> var_6879_cast_fp16 = matmul(transpose_x = var_6879_transpose_x_0, transpose_y = var_6879_transpose_y_0, x = var_6877_cast_fp16, y = v_cast_fp16)[name = string("op_6879_cast_fp16")];
+            tensor<int32, [4]> var_6880 = const()[name = string("op_6880"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_705x = const()[name = string("concat_705x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6881_cast_fp16 = transpose(perm = var_6880, x = var_6879_cast_fp16)[name = string("transpose_385")];
+            tensor<fp16, [1, ?, 1280]> x_571_cast_fp16 = reshape(shape = concat_705x, x = var_6881_cast_fp16)[name = string("x_571_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6885_to_fp16 = const()[name = string("op_6885_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578013568)))];
+            tensor<fp16, [1280]> var_6886_to_fp16 = const()[name = string("op_6886_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581290432)))];
+            tensor<fp16, [1, ?, 1280]> linear_253_cast_fp16 = linear(bias = var_6886_to_fp16, weight = var_6885_to_fp16, x = x_571_cast_fp16)[name = string("linear_253_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_573_cast_fp16 = add(x = x_567_cast_fp16, y = linear_253_cast_fp16)[name = string("x_573_cast_fp16")];
+            tensor<int32, [1]> var_6893_axes_0 = const()[name = string("op_6893_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_31_mlp_ln_weight_to_fp16 = const()[name = string("blocks_31_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581293056)))];
+            tensor<fp16, [1280]> blocks_31_mlp_ln_bias_to_fp16 = const()[name = string("blocks_31_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581295680)))];
+            tensor<fp16, [1, ?, 1280]> var_6893_cast_fp16 = layer_norm(axes = var_6893_axes_0, beta = blocks_31_mlp_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_mlp_ln_weight_to_fp16, x = x_573_cast_fp16)[name = string("op_6893_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_6902_to_fp16 = const()[name = string("op_6902_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581298304)))];
+            tensor<fp16, [5120]> var_6903_to_fp16 = const()[name = string("op_6903_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594405568)))];
+            tensor<fp16, [1, ?, 5120]> linear_254_cast_fp16 = linear(bias = var_6903_to_fp16, weight = var_6902_to_fp16, x = var_6893_cast_fp16)[name = string("linear_254_cast_fp16")];
+            string x_577_mode_0 = const()[name = string("x_577_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_577_cast_fp16 = gelu(mode = x_577_mode_0, x = linear_254_cast_fp16)[name = string("x_577_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_6908_to_fp16 = const()[name = string("op_6908_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594415872)))];
+            tensor<fp16, [1280]> var_6909_to_fp16 = const()[name = string("op_6909_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607523136)))];
+            tensor<fp16, [1, ?, 1280]> linear_255_cast_fp16 = linear(bias = var_6909_to_fp16, weight = var_6908_to_fp16, x = x_577_cast_fp16)[name = string("linear_255_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_579_cast_fp16 = add(x = x_573_cast_fp16, y = linear_255_cast_fp16)[name = string("x_579_cast_fp16")];
+            tensor<int32, [1]> var_6922_axes_0 = const()[name = string("op_6922_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607525760)))];
+            tensor<fp16, [1280]> ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607528384)))];
+            fp16 var_6913_to_fp16 = const()[name = string("op_6913_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_6922_cast_fp16 = layer_norm(axes = var_6922_axes_0, beta = ln_bias_to_fp16, epsilon = var_6913_to_fp16, gamma = ln_weight_to_fp16, x = x_579_cast_fp16)[name = string("op_6922_cast_fp16")];
+            tensor<fp16, [51865]> var_6932_bias_0_to_fp16 = const()[name = string("op_6932_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607531008)))];
+            tensor<fp16, [1, ?, 51865]> logits = linear(bias = var_6932_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_6922_cast_fp16)[name = string("op_6932_cast_fp16")];
+        } -> (logits);
+}
\ No newline at end of file
diff --git a/large-v2/decoder_second.mlmodelc/weights/weight.bin b/large-v2/decoder_second.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e54c5f6f71d4b2bd99cc6c673ad772a28e036400
--- /dev/null
+++ b/large-v2/decoder_second.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf6ddb1f8892bfddf9f96b11c6b596934c1fe6c01839f81c06d8d2e094f19533
+size 1607634802
diff --git a/large-v2/encoder.mlmodelc/analytics/coremldata.bin b/large-v2/encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..be0828391dbe6d138ead9a5b8d3e876c437b9338
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abf4666de503da73cee5379b4dfa17d4a3f06bff3c0c8e310d0a0e1cf2554f87
+size 202
diff --git a/large-v2/encoder.mlmodelc/coremldata.bin b/large-v2/encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..11af75965ddba955d827d601c8bc9c740eccffef
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e2ae6d3a42a2ca10398635e0b210846dcbc24a31184c93f9302694163bcadaf
+size 196
diff --git a/large-v2/encoder.mlmodelc/metadata.json b/large-v2/encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..fc7e22714a55ee93a6218c260ec1c188f61696bc
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/metadata.json
@@ -0,0 +1,76 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 1280]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.add" : 65,
+      "Ios18.reshape" : 128,
+      "Ios18.linear" : 192,
+      "Ios18.gelu" : 34,
+      "Ios18.matmul" : 64,
+      "Ios18.transpose" : 129,
+      "Ios18.layerNorm" : 65,
+      "Ios18.conv" : 2,
+      "Ios18.cast" : 4,
+      "Ios18.softmax" : 32,
+      "Ios18.mul" : 64
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_pipeline",
+      "structure" : [
+        {
+          "name" : "MLModelType_mlProgram"
+        },
+        {
+          "name" : "MLModelType_mlProgram"
+        }
+      ]
+    },
+    "userDefinedMetadata" : {
+
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "chunked_pipeline",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/large-v2/encoder.mlmodelc/model0/analytics/coremldata.bin b/large-v2/encoder.mlmodelc/model0/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5ed18ae44ab3d09ffbed846536c84109f12b19b1
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/model0/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a8281049b2a65a3be541cfd9f949e84b8fe1c5251ce90e46da1626fed54e58a
+size 108
diff --git a/large-v2/encoder.mlmodelc/model0/coremldata.bin b/large-v2/encoder.mlmodelc/model0/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7cff6275c851901c70c7730d7570ee9c01919f76
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/model0/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a30736ebb8641d231fc84aa2d3d05770adb9603bdca174d439416450827b75a
+size 200
diff --git a/large-v2/encoder.mlmodelc/model0/model.mil b/large-v2/encoder.mlmodelc/model0/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..dd22eae1181ca157d6dd8f1273f2dcb4977c1e88
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/model0/model.mil
@@ -0,0 +1,962 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
+{
+    func main<ios18>(tensor<fp16, [1, 80, 3000]> logmel_data) {
+            string var_84_pad_type_0 = const()[name = string("op_84_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_84_pad_0 = const()[name = string("op_84_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_84_strides_0 = const()[name = string("op_84_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_84_dilations_0 = const()[name = string("op_84_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_84_groups_0 = const()[name = string("op_84_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 80, 3]> weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor<fp16, [1280, 80, 3]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1280]> bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(614528)))];
+            tensor<fp16, [1, 1280, 3000]> var_84_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_84_dilations_0, groups = var_84_groups_0, pad = var_84_pad_0, pad_type = var_84_pad_type_0, strides = var_84_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_84_cast_fp16")];
+            string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1280, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_84_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_102_pad_type_0 = const()[name = string("op_102_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_102_pad_0 = const()[name = string("op_102_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_102_strides_0 = const()[name = string("op_102_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_102_dilations_0 = const()[name = string("op_102_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_102_groups_0 = const()[name = string("op_102_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 3]> weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor<fp16, [1280, 1280, 3]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(617152)))];
+            tensor<fp16, [1280]> bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(10447616)))];
+            tensor<fp16, [1, 1280, 1500]> var_102_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_102_dilations_0, groups = var_102_groups_0, pad = var_102_pad_0, pad_type = var_102_pad_type_0, strides = var_102_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_102_cast_fp16")];
+            string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1280, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_102_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<int32, [3]> var_108 = const()[name = string("op_108"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [1500, 1280]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [1500, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(10450240)))];
+            tensor<fp16, [1, 1500, 1280]> x_5_cast_fp16 = transpose(perm = var_108, x = x_3_cast_fp16)[name = string("transpose_160")];
+            tensor<fp16, [1, 1500, 1280]> var_111_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_111_cast_fp16")];
+            int32 var_124 = const()[name = string("op_124"), val = int32(-1)];
+            tensor<int32, [1]> var_140_axes_0 = const()[name = string("op_140_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14290304)))];
+            tensor<fp16, [1280]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14292928)))];
+            fp16 var_130_to_fp16 = const()[name = string("op_130_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_140_cast_fp16 = layer_norm(axes = var_140_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_130_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_111_cast_fp16)[name = string("op_140_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14295552)))];
+            tensor<fp16, [1280]> var_152_to_fp16 = const()[name = string("op_152_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(17572416)))];
+            tensor<fp16, [1, 1500, 1280]> linear_0_cast_fp16 = linear(bias = var_152_to_fp16, weight = var_151_to_fp16, x = var_140_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_155_to_fp16 = const()[name = string("op_155_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(17575040)))];
+            tensor<fp16, [1280]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(20851904)))];
+            tensor<fp16, [1, 1500, 1280]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_155_to_fp16, x = var_140_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_159_to_fp16 = const()[name = string("op_159_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(20854528)))];
+            tensor<fp16, [1280]> var_160_to_fp16 = const()[name = string("op_160_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(24131392)))];
+            tensor<fp16, [1, 1500, 1280]> linear_2_cast_fp16 = linear(bias = var_160_to_fp16, weight = var_159_to_fp16, x = var_140_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [4]> var_168 = const()[name = string("op_168"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_169_cast_fp16 = reshape(shape = var_168, x = linear_0_cast_fp16)[name = string("op_169_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_224_to_fp16 = const()[name = string("const_224_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_3_cast_fp16 = mul(x = var_169_cast_fp16, y = const_224_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> var_175 = const()[name = string("op_175"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_176_cast_fp16 = reshape(shape = var_175, x = linear_1_cast_fp16)[name = string("op_176_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_3_cast_fp16 = mul(x = var_176_cast_fp16, y = const_225_to_fp16)[name = string("k_3_cast_fp16")];
+            tensor<int32, [4]> var_182 = const()[name = string("op_182"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_183_cast_fp16 = reshape(shape = var_182, x = linear_2_cast_fp16)[name = string("op_183_cast_fp16")];
+            tensor<int32, [4]> var_184 = const()[name = string("op_184"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_65 = transpose(perm = transpose_65_perm_0, x = k_3_cast_fp16)[name = string("transpose_158")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_64 = transpose(perm = transpose_64_perm_0, x = q_3_cast_fp16)[name = string("transpose_159")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("qk_1_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_188_cast_fp16 = softmax(axis = var_124, x = qk_1_cast_fp16)[name = string("op_188_cast_fp16")];
+            bool var_190_transpose_x_0 = const()[name = string("op_190_transpose_x_0"), val = bool(false)];
+            bool var_190_transpose_y_0 = const()[name = string("op_190_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_3_cast_fp16 = transpose(perm = var_184, x = var_183_cast_fp16)[name = string("transpose_157")];
+            tensor<fp16, [1, 20, 1500, 64]> var_190_cast_fp16 = matmul(transpose_x = var_190_transpose_x_0, transpose_y = var_190_transpose_y_0, x = var_188_cast_fp16, y = v_3_cast_fp16)[name = string("op_190_cast_fp16")];
+            tensor<int32, [4]> var_191 = const()[name = string("op_191"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_192_cast_fp16 = transpose(perm = var_191, x = var_190_cast_fp16)[name = string("transpose_156")];
+            tensor<fp16, [1, 1500, 1280]> x_11_cast_fp16 = reshape(shape = concat_0, x = var_192_cast_fp16)[name = string("x_11_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_196_to_fp16 = const()[name = string("op_196_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(24134016)))];
+            tensor<fp16, [1280]> var_197_to_fp16 = const()[name = string("op_197_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27410880)))];
+            tensor<fp16, [1, 1500, 1280]> linear_3_cast_fp16 = linear(bias = var_197_to_fp16, weight = var_196_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_13_cast_fp16 = add(x = var_111_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<int32, [1]> var_204_axes_0 = const()[name = string("op_204_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27413504)))];
+            tensor<fp16, [1280]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27416128)))];
+            tensor<fp16, [1, 1500, 1280]> var_204_cast_fp16 = layer_norm(axes = var_204_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_130_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_204_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27418752)))];
+            tensor<fp16, [5120]> var_214_to_fp16 = const()[name = string("op_214_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(40526016)))];
+            tensor<fp16, [1, 1500, 5120]> linear_4_cast_fp16 = linear(bias = var_214_to_fp16, weight = var_213_to_fp16, x = var_204_cast_fp16)[name = string("linear_4_cast_fp16")];
+            string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_219_to_fp16 = const()[name = string("op_219_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(40536320)))];
+            tensor<fp16, [1280]> var_220_to_fp16 = const()[name = string("op_220_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(53643584)))];
+            tensor<fp16, [1, 1500, 1280]> linear_5_cast_fp16 = linear(bias = var_220_to_fp16, weight = var_219_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")];
+            int32 var_230 = const()[name = string("op_230"), val = int32(-1)];
+            tensor<int32, [1]> var_246_axes_0 = const()[name = string("op_246_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(53646208)))];
+            tensor<fp16, [1280]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(53648832)))];
+            fp16 var_236_to_fp16 = const()[name = string("op_236_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_246_cast_fp16 = layer_norm(axes = var_246_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_236_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_246_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(53651456)))];
+            tensor<fp16, [1280]> var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(56928320)))];
+            tensor<fp16, [1, 1500, 1280]> linear_6_cast_fp16 = linear(bias = var_258_to_fp16, weight = var_257_to_fp16, x = var_246_cast_fp16)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_261_to_fp16 = const()[name = string("op_261_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(56930944)))];
+            tensor<fp16, [1, 1500, 1280]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_261_to_fp16, x = var_246_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(60207808)))];
+            tensor<fp16, [1280]> var_266_to_fp16 = const()[name = string("op_266_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(63484672)))];
+            tensor<fp16, [1, 1500, 1280]> linear_8_cast_fp16 = linear(bias = var_266_to_fp16, weight = var_265_to_fp16, x = var_246_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<int32, [4]> var_274 = const()[name = string("op_274"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_275_cast_fp16 = reshape(shape = var_274, x = linear_6_cast_fp16)[name = string("op_275_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_226_to_fp16 = const()[name = string("const_226_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_7_cast_fp16 = mul(x = var_275_cast_fp16, y = const_226_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_281 = const()[name = string("op_281"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_282_cast_fp16 = reshape(shape = var_281, x = linear_7_cast_fp16)[name = string("op_282_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_227_to_fp16 = const()[name = string("const_227_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_7_cast_fp16 = mul(x = var_282_cast_fp16, y = const_227_to_fp16)[name = string("k_7_cast_fp16")];
+            tensor<int32, [4]> var_288 = const()[name = string("op_288"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_289_cast_fp16 = reshape(shape = var_288, x = linear_8_cast_fp16)[name = string("op_289_cast_fp16")];
+            tensor<int32, [4]> var_290 = const()[name = string("op_290"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)];
+            bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_67 = transpose(perm = transpose_67_perm_0, x = k_7_cast_fp16)[name = string("transpose_154")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_66 = transpose(perm = transpose_66_perm_0, x = q_7_cast_fp16)[name = string("transpose_155")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_294_cast_fp16 = softmax(axis = var_230, x = qk_3_cast_fp16)[name = string("op_294_cast_fp16")];
+            bool var_296_transpose_x_0 = const()[name = string("op_296_transpose_x_0"), val = bool(false)];
+            bool var_296_transpose_y_0 = const()[name = string("op_296_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_7_cast_fp16 = transpose(perm = var_290, x = var_289_cast_fp16)[name = string("transpose_153")];
+            tensor<fp16, [1, 20, 1500, 64]> var_296_cast_fp16 = matmul(transpose_x = var_296_transpose_x_0, transpose_y = var_296_transpose_y_0, x = var_294_cast_fp16, y = v_7_cast_fp16)[name = string("op_296_cast_fp16")];
+            tensor<int32, [4]> var_297 = const()[name = string("op_297"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_298_cast_fp16 = transpose(perm = var_297, x = var_296_cast_fp16)[name = string("transpose_152")];
+            tensor<fp16, [1, 1500, 1280]> x_23_cast_fp16 = reshape(shape = concat_1, x = var_298_cast_fp16)[name = string("x_23_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_302_to_fp16 = const()[name = string("op_302_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(63487296)))];
+            tensor<fp16, [1280]> var_303_to_fp16 = const()[name = string("op_303_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(66764160)))];
+            tensor<fp16, [1, 1500, 1280]> linear_9_cast_fp16 = linear(bias = var_303_to_fp16, weight = var_302_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<int32, [1]> var_310_axes_0 = const()[name = string("op_310_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(66766784)))];
+            tensor<fp16, [1280]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(66769408)))];
+            tensor<fp16, [1, 1500, 1280]> var_310_cast_fp16 = layer_norm(axes = var_310_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_236_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_310_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_319_to_fp16 = const()[name = string("op_319_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(66772032)))];
+            tensor<fp16, [5120]> var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(79879296)))];
+            tensor<fp16, [1, 1500, 5120]> linear_10_cast_fp16 = linear(bias = var_320_to_fp16, weight = var_319_to_fp16, x = var_310_cast_fp16)[name = string("linear_10_cast_fp16")];
+            string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_325_to_fp16 = const()[name = string("op_325_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(79889600)))];
+            tensor<fp16, [1280]> var_326_to_fp16 = const()[name = string("op_326_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(92996864)))];
+            tensor<fp16, [1, 1500, 1280]> linear_11_cast_fp16 = linear(bias = var_326_to_fp16, weight = var_325_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")];
+            int32 var_336 = const()[name = string("op_336"), val = int32(-1)];
+            tensor<int32, [1]> var_352_axes_0 = const()[name = string("op_352_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(92999488)))];
+            tensor<fp16, [1280]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93002112)))];
+            fp16 var_342_to_fp16 = const()[name = string("op_342_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_352_cast_fp16 = layer_norm(axes = var_352_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_342_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_352_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_363_to_fp16 = const()[name = string("op_363_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93004736)))];
+            tensor<fp16, [1280]> var_364_to_fp16 = const()[name = string("op_364_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(96281600)))];
+            tensor<fp16, [1, 1500, 1280]> linear_12_cast_fp16 = linear(bias = var_364_to_fp16, weight = var_363_to_fp16, x = var_352_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(96284224)))];
+            tensor<fp16, [1, 1500, 1280]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_367_to_fp16, x = var_352_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_371_to_fp16 = const()[name = string("op_371_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(99561088)))];
+            tensor<fp16, [1280]> var_372_to_fp16 = const()[name = string("op_372_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(102837952)))];
+            tensor<fp16, [1, 1500, 1280]> linear_14_cast_fp16 = linear(bias = var_372_to_fp16, weight = var_371_to_fp16, x = var_352_cast_fp16)[name = string("linear_14_cast_fp16")];
+            tensor<int32, [4]> var_380 = const()[name = string("op_380"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_381_cast_fp16 = reshape(shape = var_380, x = linear_12_cast_fp16)[name = string("op_381_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_228_to_fp16 = const()[name = string("const_228_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_11_cast_fp16 = mul(x = var_381_cast_fp16, y = const_228_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> var_387 = const()[name = string("op_387"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_388_cast_fp16 = reshape(shape = var_387, x = linear_13_cast_fp16)[name = string("op_388_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_229_to_fp16 = const()[name = string("const_229_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_11_cast_fp16 = mul(x = var_388_cast_fp16, y = const_229_to_fp16)[name = string("k_11_cast_fp16")];
+            tensor<int32, [4]> var_394 = const()[name = string("op_394"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_395_cast_fp16 = reshape(shape = var_394, x = linear_14_cast_fp16)[name = string("op_395_cast_fp16")];
+            tensor<int32, [4]> var_396 = const()[name = string("op_396"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_69 = transpose(perm = transpose_69_perm_0, x = k_11_cast_fp16)[name = string("transpose_150")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_68 = transpose(perm = transpose_68_perm_0, x = q_11_cast_fp16)[name = string("transpose_151")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_400_cast_fp16 = softmax(axis = var_336, x = qk_5_cast_fp16)[name = string("op_400_cast_fp16")];
+            bool var_402_transpose_x_0 = const()[name = string("op_402_transpose_x_0"), val = bool(false)];
+            bool var_402_transpose_y_0 = const()[name = string("op_402_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_11_cast_fp16 = transpose(perm = var_396, x = var_395_cast_fp16)[name = string("transpose_149")];
+            tensor<fp16, [1, 20, 1500, 64]> var_402_cast_fp16 = matmul(transpose_x = var_402_transpose_x_0, transpose_y = var_402_transpose_y_0, x = var_400_cast_fp16, y = v_11_cast_fp16)[name = string("op_402_cast_fp16")];
+            tensor<int32, [4]> var_403 = const()[name = string("op_403"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_404_cast_fp16 = transpose(perm = var_403, x = var_402_cast_fp16)[name = string("transpose_148")];
+            tensor<fp16, [1, 1500, 1280]> x_35_cast_fp16 = reshape(shape = concat_2, x = var_404_cast_fp16)[name = string("x_35_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_408_to_fp16 = const()[name = string("op_408_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(102840576)))];
+            tensor<fp16, [1280]> var_409_to_fp16 = const()[name = string("op_409_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106117440)))];
+            tensor<fp16, [1, 1500, 1280]> linear_15_cast_fp16 = linear(bias = var_409_to_fp16, weight = var_408_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<int32, [1]> var_416_axes_0 = const()[name = string("op_416_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106120064)))];
+            tensor<fp16, [1280]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106122688)))];
+            tensor<fp16, [1, 1500, 1280]> var_416_cast_fp16 = layer_norm(axes = var_416_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_342_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_416_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_425_to_fp16 = const()[name = string("op_425_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106125312)))];
+            tensor<fp16, [5120]> var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(119232576)))];
+            tensor<fp16, [1, 1500, 5120]> linear_16_cast_fp16 = linear(bias = var_426_to_fp16, weight = var_425_to_fp16, x = var_416_cast_fp16)[name = string("linear_16_cast_fp16")];
+            string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_431_to_fp16 = const()[name = string("op_431_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(119242880)))];
+            tensor<fp16, [1280]> var_432_to_fp16 = const()[name = string("op_432_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132350144)))];
+            tensor<fp16, [1, 1500, 1280]> linear_17_cast_fp16 = linear(bias = var_432_to_fp16, weight = var_431_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")];
+            int32 var_442 = const()[name = string("op_442"), val = int32(-1)];
+            tensor<int32, [1]> var_458_axes_0 = const()[name = string("op_458_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132352768)))];
+            tensor<fp16, [1280]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132355392)))];
+            fp16 var_448_to_fp16 = const()[name = string("op_448_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_458_cast_fp16 = layer_norm(axes = var_458_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_448_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_458_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132358016)))];
+            tensor<fp16, [1280]> var_470_to_fp16 = const()[name = string("op_470_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(135634880)))];
+            tensor<fp16, [1, 1500, 1280]> linear_18_cast_fp16 = linear(bias = var_470_to_fp16, weight = var_469_to_fp16, x = var_458_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_473_to_fp16 = const()[name = string("op_473_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(135637504)))];
+            tensor<fp16, [1, 1500, 1280]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_473_to_fp16, x = var_458_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_477_to_fp16 = const()[name = string("op_477_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(138914368)))];
+            tensor<fp16, [1280]> var_478_to_fp16 = const()[name = string("op_478_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(142191232)))];
+            tensor<fp16, [1, 1500, 1280]> linear_20_cast_fp16 = linear(bias = var_478_to_fp16, weight = var_477_to_fp16, x = var_458_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [4]> var_486 = const()[name = string("op_486"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_487_cast_fp16 = reshape(shape = var_486, x = linear_18_cast_fp16)[name = string("op_487_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_15_cast_fp16 = mul(x = var_487_cast_fp16, y = const_230_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_493 = const()[name = string("op_493"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_494_cast_fp16 = reshape(shape = var_493, x = linear_19_cast_fp16)[name = string("op_494_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_15_cast_fp16 = mul(x = var_494_cast_fp16, y = const_231_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> var_500 = const()[name = string("op_500"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_501_cast_fp16 = reshape(shape = var_500, x = linear_20_cast_fp16)[name = string("op_501_cast_fp16")];
+            tensor<int32, [4]> var_502 = const()[name = string("op_502"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_71 = transpose(perm = transpose_71_perm_0, x = k_15_cast_fp16)[name = string("transpose_146")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_70 = transpose(perm = transpose_70_perm_0, x = q_15_cast_fp16)[name = string("transpose_147")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("qk_7_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_506_cast_fp16 = softmax(axis = var_442, x = qk_7_cast_fp16)[name = string("op_506_cast_fp16")];
+            bool var_508_transpose_x_0 = const()[name = string("op_508_transpose_x_0"), val = bool(false)];
+            bool var_508_transpose_y_0 = const()[name = string("op_508_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_15_cast_fp16 = transpose(perm = var_502, x = var_501_cast_fp16)[name = string("transpose_145")];
+            tensor<fp16, [1, 20, 1500, 64]> var_508_cast_fp16 = matmul(transpose_x = var_508_transpose_x_0, transpose_y = var_508_transpose_y_0, x = var_506_cast_fp16, y = v_15_cast_fp16)[name = string("op_508_cast_fp16")];
+            tensor<int32, [4]> var_509 = const()[name = string("op_509"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_510_cast_fp16 = transpose(perm = var_509, x = var_508_cast_fp16)[name = string("transpose_144")];
+            tensor<fp16, [1, 1500, 1280]> x_47_cast_fp16 = reshape(shape = concat_3, x = var_510_cast_fp16)[name = string("x_47_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_514_to_fp16 = const()[name = string("op_514_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(142193856)))];
+            tensor<fp16, [1280]> var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145470720)))];
+            tensor<fp16, [1, 1500, 1280]> linear_21_cast_fp16 = linear(bias = var_515_to_fp16, weight = var_514_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<int32, [1]> var_522_axes_0 = const()[name = string("op_522_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145473344)))];
+            tensor<fp16, [1280]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145475968)))];
+            tensor<fp16, [1, 1500, 1280]> var_522_cast_fp16 = layer_norm(axes = var_522_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_448_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_522_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145478592)))];
+            tensor<fp16, [5120]> var_532_to_fp16 = const()[name = string("op_532_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(158585856)))];
+            tensor<fp16, [1, 1500, 5120]> linear_22_cast_fp16 = linear(bias = var_532_to_fp16, weight = var_531_to_fp16, x = var_522_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_537_to_fp16 = const()[name = string("op_537_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(158596160)))];
+            tensor<fp16, [1280]> var_538_to_fp16 = const()[name = string("op_538_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(171703424)))];
+            tensor<fp16, [1, 1500, 1280]> linear_23_cast_fp16 = linear(bias = var_538_to_fp16, weight = var_537_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_55_cast_fp16")];
+            int32 var_548 = const()[name = string("op_548"), val = int32(-1)];
+            tensor<int32, [1]> var_564_axes_0 = const()[name = string("op_564_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(171706048)))];
+            tensor<fp16, [1280]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(171708672)))];
+            fp16 var_554_to_fp16 = const()[name = string("op_554_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_564_cast_fp16 = layer_norm(axes = var_564_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_554_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = string("op_564_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_575_to_fp16 = const()[name = string("op_575_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(171711296)))];
+            tensor<fp16, [1280]> var_576_to_fp16 = const()[name = string("op_576_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(174988160)))];
+            tensor<fp16, [1, 1500, 1280]> linear_24_cast_fp16 = linear(bias = var_576_to_fp16, weight = var_575_to_fp16, x = var_564_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_579_to_fp16 = const()[name = string("op_579_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(174990784)))];
+            tensor<fp16, [1, 1500, 1280]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_579_to_fp16, x = var_564_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_583_to_fp16 = const()[name = string("op_583_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(178267648)))];
+            tensor<fp16, [1280]> var_584_to_fp16 = const()[name = string("op_584_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(181544512)))];
+            tensor<fp16, [1, 1500, 1280]> linear_26_cast_fp16 = linear(bias = var_584_to_fp16, weight = var_583_to_fp16, x = var_564_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [4]> var_592 = const()[name = string("op_592"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_593_cast_fp16 = reshape(shape = var_592, x = linear_24_cast_fp16)[name = string("op_593_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_232_to_fp16 = const()[name = string("const_232_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_19_cast_fp16 = mul(x = var_593_cast_fp16, y = const_232_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> var_599 = const()[name = string("op_599"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_600_cast_fp16 = reshape(shape = var_599, x = linear_25_cast_fp16)[name = string("op_600_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_19_cast_fp16 = mul(x = var_600_cast_fp16, y = const_233_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_606 = const()[name = string("op_606"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_607_cast_fp16 = reshape(shape = var_606, x = linear_26_cast_fp16)[name = string("op_607_cast_fp16")];
+            tensor<int32, [4]> var_608 = const()[name = string("op_608"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_9_transpose_x_0 = const()[name = string("qk_9_transpose_x_0"), val = bool(false)];
+            bool qk_9_transpose_y_0 = const()[name = string("qk_9_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_73 = transpose(perm = transpose_73_perm_0, x = k_19_cast_fp16)[name = string("transpose_142")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = q_19_cast_fp16)[name = string("transpose_143")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_72, y = transpose_73)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_612_cast_fp16 = softmax(axis = var_548, x = qk_9_cast_fp16)[name = string("op_612_cast_fp16")];
+            bool var_614_transpose_x_0 = const()[name = string("op_614_transpose_x_0"), val = bool(false)];
+            bool var_614_transpose_y_0 = const()[name = string("op_614_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_608, x = var_607_cast_fp16)[name = string("transpose_141")];
+            tensor<fp16, [1, 20, 1500, 64]> var_614_cast_fp16 = matmul(transpose_x = var_614_transpose_x_0, transpose_y = var_614_transpose_y_0, x = var_612_cast_fp16, y = v_19_cast_fp16)[name = string("op_614_cast_fp16")];
+            tensor<int32, [4]> var_615 = const()[name = string("op_615"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_4 = const()[name = string("concat_4"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_616_cast_fp16 = transpose(perm = var_615, x = var_614_cast_fp16)[name = string("transpose_140")];
+            tensor<fp16, [1, 1500, 1280]> x_59_cast_fp16 = reshape(shape = concat_4, x = var_616_cast_fp16)[name = string("x_59_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_620_to_fp16 = const()[name = string("op_620_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(181547136)))];
+            tensor<fp16, [1280]> var_621_to_fp16 = const()[name = string("op_621_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(184824000)))];
+            tensor<fp16, [1, 1500, 1280]> linear_27_cast_fp16 = linear(bias = var_621_to_fp16, weight = var_620_to_fp16, x = x_59_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<int32, [1]> var_628_axes_0 = const()[name = string("op_628_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(184826624)))];
+            tensor<fp16, [1280]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(184829248)))];
+            tensor<fp16, [1, 1500, 1280]> var_628_cast_fp16 = layer_norm(axes = var_628_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_554_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = string("op_628_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_637_to_fp16 = const()[name = string("op_637_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(184831872)))];
+            tensor<fp16, [5120]> var_638_to_fp16 = const()[name = string("op_638_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(197939136)))];
+            tensor<fp16, [1, 1500, 5120]> linear_28_cast_fp16 = linear(bias = var_638_to_fp16, weight = var_637_to_fp16, x = var_628_cast_fp16)[name = string("linear_28_cast_fp16")];
+            string x_65_mode_0 = const()[name = string("x_65_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = string("x_65_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_643_to_fp16 = const()[name = string("op_643_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(197949440)))];
+            tensor<fp16, [1280]> var_644_to_fp16 = const()[name = string("op_644_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211056704)))];
+            tensor<fp16, [1, 1500, 1280]> linear_29_cast_fp16 = linear(bias = var_644_to_fp16, weight = var_643_to_fp16, x = x_65_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = string("x_67_cast_fp16")];
+            int32 var_654 = const()[name = string("op_654"), val = int32(-1)];
+            tensor<int32, [1]> var_670_axes_0 = const()[name = string("op_670_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211059328)))];
+            tensor<fp16, [1280]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211061952)))];
+            fp16 var_660_to_fp16 = const()[name = string("op_660_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_670_cast_fp16 = layer_norm(axes = var_670_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_660_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = string("op_670_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_681_to_fp16 = const()[name = string("op_681_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211064576)))];
+            tensor<fp16, [1280]> var_682_to_fp16 = const()[name = string("op_682_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(214341440)))];
+            tensor<fp16, [1, 1500, 1280]> linear_30_cast_fp16 = linear(bias = var_682_to_fp16, weight = var_681_to_fp16, x = var_670_cast_fp16)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_685_to_fp16 = const()[name = string("op_685_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(214344064)))];
+            tensor<fp16, [1, 1500, 1280]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_685_to_fp16, x = var_670_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_689_to_fp16 = const()[name = string("op_689_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(217620928)))];
+            tensor<fp16, [1280]> var_690_to_fp16 = const()[name = string("op_690_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(220897792)))];
+            tensor<fp16, [1, 1500, 1280]> linear_32_cast_fp16 = linear(bias = var_690_to_fp16, weight = var_689_to_fp16, x = var_670_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<int32, [4]> var_698 = const()[name = string("op_698"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_699_cast_fp16 = reshape(shape = var_698, x = linear_30_cast_fp16)[name = string("op_699_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_234_to_fp16 = const()[name = string("const_234_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_23_cast_fp16 = mul(x = var_699_cast_fp16, y = const_234_to_fp16)[name = string("q_23_cast_fp16")];
+            tensor<int32, [4]> var_705 = const()[name = string("op_705"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_706_cast_fp16 = reshape(shape = var_705, x = linear_31_cast_fp16)[name = string("op_706_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_235_to_fp16 = const()[name = string("const_235_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_23_cast_fp16 = mul(x = var_706_cast_fp16, y = const_235_to_fp16)[name = string("k_23_cast_fp16")];
+            tensor<int32, [4]> var_712 = const()[name = string("op_712"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_713_cast_fp16 = reshape(shape = var_712, x = linear_32_cast_fp16)[name = string("op_713_cast_fp16")];
+            tensor<int32, [4]> var_714 = const()[name = string("op_714"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
+            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_75 = transpose(perm = transpose_75_perm_0, x = k_23_cast_fp16)[name = string("transpose_138")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_74 = transpose(perm = transpose_74_perm_0, x = q_23_cast_fp16)[name = string("transpose_139")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_74, y = transpose_75)[name = string("qk_11_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_718_cast_fp16 = softmax(axis = var_654, x = qk_11_cast_fp16)[name = string("op_718_cast_fp16")];
+            bool var_720_transpose_x_0 = const()[name = string("op_720_transpose_x_0"), val = bool(false)];
+            bool var_720_transpose_y_0 = const()[name = string("op_720_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_23_cast_fp16 = transpose(perm = var_714, x = var_713_cast_fp16)[name = string("transpose_137")];
+            tensor<fp16, [1, 20, 1500, 64]> var_720_cast_fp16 = matmul(transpose_x = var_720_transpose_x_0, transpose_y = var_720_transpose_y_0, x = var_718_cast_fp16, y = v_23_cast_fp16)[name = string("op_720_cast_fp16")];
+            tensor<int32, [4]> var_721 = const()[name = string("op_721"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_722_cast_fp16 = transpose(perm = var_721, x = var_720_cast_fp16)[name = string("transpose_136")];
+            tensor<fp16, [1, 1500, 1280]> x_71_cast_fp16 = reshape(shape = concat_5, x = var_722_cast_fp16)[name = string("x_71_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_726_to_fp16 = const()[name = string("op_726_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(220900416)))];
+            tensor<fp16, [1280]> var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224177280)))];
+            tensor<fp16, [1, 1500, 1280]> linear_33_cast_fp16 = linear(bias = var_727_to_fp16, weight = var_726_to_fp16, x = x_71_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<int32, [1]> var_734_axes_0 = const()[name = string("op_734_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224179904)))];
+            tensor<fp16, [1280]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224182528)))];
+            tensor<fp16, [1, 1500, 1280]> var_734_cast_fp16 = layer_norm(axes = var_734_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_660_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = string("op_734_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224185152)))];
+            tensor<fp16, [5120]> var_744_to_fp16 = const()[name = string("op_744_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(237292416)))];
+            tensor<fp16, [1, 1500, 5120]> linear_34_cast_fp16 = linear(bias = var_744_to_fp16, weight = var_743_to_fp16, x = var_734_cast_fp16)[name = string("linear_34_cast_fp16")];
+            string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = string("x_77_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_749_to_fp16 = const()[name = string("op_749_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(237302720)))];
+            tensor<fp16, [1280]> var_750_to_fp16 = const()[name = string("op_750_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250409984)))];
+            tensor<fp16, [1, 1500, 1280]> linear_35_cast_fp16 = linear(bias = var_750_to_fp16, weight = var_749_to_fp16, x = x_77_cast_fp16)[name = string("linear_35_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_79_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = string("x_79_cast_fp16")];
+            int32 var_760 = const()[name = string("op_760"), val = int32(-1)];
+            tensor<int32, [1]> var_776_axes_0 = const()[name = string("op_776_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250412608)))];
+            tensor<fp16, [1280]> blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250415232)))];
+            fp16 var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_776_cast_fp16 = layer_norm(axes = var_776_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_766_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_79_cast_fp16)[name = string("op_776_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_787_to_fp16 = const()[name = string("op_787_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250417856)))];
+            tensor<fp16, [1280]> var_788_to_fp16 = const()[name = string("op_788_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(253694720)))];
+            tensor<fp16, [1, 1500, 1280]> linear_36_cast_fp16 = linear(bias = var_788_to_fp16, weight = var_787_to_fp16, x = var_776_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_791_to_fp16 = const()[name = string("op_791_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(253697344)))];
+            tensor<fp16, [1, 1500, 1280]> linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_791_to_fp16, x = var_776_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(256974208)))];
+            tensor<fp16, [1280]> var_796_to_fp16 = const()[name = string("op_796_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(260251072)))];
+            tensor<fp16, [1, 1500, 1280]> linear_38_cast_fp16 = linear(bias = var_796_to_fp16, weight = var_795_to_fp16, x = var_776_cast_fp16)[name = string("linear_38_cast_fp16")];
+            tensor<int32, [4]> var_804 = const()[name = string("op_804"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_805_cast_fp16 = reshape(shape = var_804, x = linear_36_cast_fp16)[name = string("op_805_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_236_to_fp16 = const()[name = string("const_236_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_27_cast_fp16 = mul(x = var_805_cast_fp16, y = const_236_to_fp16)[name = string("q_27_cast_fp16")];
+            tensor<int32, [4]> var_811 = const()[name = string("op_811"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_812_cast_fp16 = reshape(shape = var_811, x = linear_37_cast_fp16)[name = string("op_812_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_237_to_fp16 = const()[name = string("const_237_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_27_cast_fp16 = mul(x = var_812_cast_fp16, y = const_237_to_fp16)[name = string("k_27_cast_fp16")];
+            tensor<int32, [4]> var_818 = const()[name = string("op_818"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_819_cast_fp16 = reshape(shape = var_818, x = linear_38_cast_fp16)[name = string("op_819_cast_fp16")];
+            tensor<int32, [4]> var_820 = const()[name = string("op_820"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
+            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_77 = transpose(perm = transpose_77_perm_0, x = k_27_cast_fp16)[name = string("transpose_134")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = q_27_cast_fp16)[name = string("transpose_135")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_76, y = transpose_77)[name = string("qk_13_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_824_cast_fp16 = softmax(axis = var_760, x = qk_13_cast_fp16)[name = string("op_824_cast_fp16")];
+            bool var_826_transpose_x_0 = const()[name = string("op_826_transpose_x_0"), val = bool(false)];
+            bool var_826_transpose_y_0 = const()[name = string("op_826_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_27_cast_fp16 = transpose(perm = var_820, x = var_819_cast_fp16)[name = string("transpose_133")];
+            tensor<fp16, [1, 20, 1500, 64]> var_826_cast_fp16 = matmul(transpose_x = var_826_transpose_x_0, transpose_y = var_826_transpose_y_0, x = var_824_cast_fp16, y = v_27_cast_fp16)[name = string("op_826_cast_fp16")];
+            tensor<int32, [4]> var_827 = const()[name = string("op_827"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_6 = const()[name = string("concat_6"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_828_cast_fp16 = transpose(perm = var_827, x = var_826_cast_fp16)[name = string("transpose_132")];
+            tensor<fp16, [1, 1500, 1280]> x_83_cast_fp16 = reshape(shape = concat_6, x = var_828_cast_fp16)[name = string("x_83_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_832_to_fp16 = const()[name = string("op_832_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(260253696)))];
+            tensor<fp16, [1280]> var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263530560)))];
+            tensor<fp16, [1, 1500, 1280]> linear_39_cast_fp16 = linear(bias = var_833_to_fp16, weight = var_832_to_fp16, x = x_83_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_85_cast_fp16 = add(x = x_79_cast_fp16, y = linear_39_cast_fp16)[name = string("x_85_cast_fp16")];
+            tensor<int32, [1]> var_840_axes_0 = const()[name = string("op_840_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263533184)))];
+            tensor<fp16, [1280]> blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263535808)))];
+            tensor<fp16, [1, 1500, 1280]> var_840_cast_fp16 = layer_norm(axes = var_840_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_766_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_85_cast_fp16)[name = string("op_840_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263538432)))];
+            tensor<fp16, [5120]> var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(276645696)))];
+            tensor<fp16, [1, 1500, 5120]> linear_40_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = var_840_cast_fp16)[name = string("linear_40_cast_fp16")];
+            string x_89_mode_0 = const()[name = string("x_89_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_89_cast_fp16 = gelu(mode = x_89_mode_0, x = linear_40_cast_fp16)[name = string("x_89_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_855_to_fp16 = const()[name = string("op_855_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(276656000)))];
+            tensor<fp16, [1280]> var_856_to_fp16 = const()[name = string("op_856_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(289763264)))];
+            tensor<fp16, [1, 1500, 1280]> linear_41_cast_fp16 = linear(bias = var_856_to_fp16, weight = var_855_to_fp16, x = x_89_cast_fp16)[name = string("linear_41_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_91_cast_fp16 = add(x = x_85_cast_fp16, y = linear_41_cast_fp16)[name = string("x_91_cast_fp16")];
+            int32 var_866 = const()[name = string("op_866"), val = int32(-1)];
+            tensor<int32, [1]> var_882_axes_0 = const()[name = string("op_882_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(289765888)))];
+            tensor<fp16, [1280]> blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(289768512)))];
+            fp16 var_872_to_fp16 = const()[name = string("op_872_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_882_cast_fp16 = layer_norm(axes = var_882_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_872_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_91_cast_fp16)[name = string("op_882_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_893_to_fp16 = const()[name = string("op_893_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(289771136)))];
+            tensor<fp16, [1280]> var_894_to_fp16 = const()[name = string("op_894_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(293048000)))];
+            tensor<fp16, [1, 1500, 1280]> linear_42_cast_fp16 = linear(bias = var_894_to_fp16, weight = var_893_to_fp16, x = var_882_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(293050624)))];
+            tensor<fp16, [1, 1500, 1280]> linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_897_to_fp16, x = var_882_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(296327488)))];
+            tensor<fp16, [1280]> var_902_to_fp16 = const()[name = string("op_902_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(299604352)))];
+            tensor<fp16, [1, 1500, 1280]> linear_44_cast_fp16 = linear(bias = var_902_to_fp16, weight = var_901_to_fp16, x = var_882_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [4]> var_910 = const()[name = string("op_910"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_911_cast_fp16 = reshape(shape = var_910, x = linear_42_cast_fp16)[name = string("op_911_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_238_to_fp16 = const()[name = string("const_238_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_31_cast_fp16 = mul(x = var_911_cast_fp16, y = const_238_to_fp16)[name = string("q_31_cast_fp16")];
+            tensor<int32, [4]> var_917 = const()[name = string("op_917"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_918_cast_fp16 = reshape(shape = var_917, x = linear_43_cast_fp16)[name = string("op_918_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_239_to_fp16 = const()[name = string("const_239_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_31_cast_fp16 = mul(x = var_918_cast_fp16, y = const_239_to_fp16)[name = string("k_31_cast_fp16")];
+            tensor<int32, [4]> var_924 = const()[name = string("op_924"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_925_cast_fp16 = reshape(shape = var_924, x = linear_44_cast_fp16)[name = string("op_925_cast_fp16")];
+            tensor<int32, [4]> var_926 = const()[name = string("op_926"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_15_transpose_x_0 = const()[name = string("qk_15_transpose_x_0"), val = bool(false)];
+            bool qk_15_transpose_y_0 = const()[name = string("qk_15_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_79 = transpose(perm = transpose_79_perm_0, x = k_31_cast_fp16)[name = string("transpose_130")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_78 = transpose(perm = transpose_78_perm_0, x = q_31_cast_fp16)[name = string("transpose_131")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_15_cast_fp16 = matmul(transpose_x = qk_15_transpose_x_0, transpose_y = qk_15_transpose_y_0, x = transpose_78, y = transpose_79)[name = string("qk_15_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_930_cast_fp16 = softmax(axis = var_866, x = qk_15_cast_fp16)[name = string("op_930_cast_fp16")];
+            bool var_932_transpose_x_0 = const()[name = string("op_932_transpose_x_0"), val = bool(false)];
+            bool var_932_transpose_y_0 = const()[name = string("op_932_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_31_cast_fp16 = transpose(perm = var_926, x = var_925_cast_fp16)[name = string("transpose_129")];
+            tensor<fp16, [1, 20, 1500, 64]> var_932_cast_fp16 = matmul(transpose_x = var_932_transpose_x_0, transpose_y = var_932_transpose_y_0, x = var_930_cast_fp16, y = v_31_cast_fp16)[name = string("op_932_cast_fp16")];
+            tensor<int32, [4]> var_933 = const()[name = string("op_933"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_7 = const()[name = string("concat_7"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_934_cast_fp16 = transpose(perm = var_933, x = var_932_cast_fp16)[name = string("transpose_128")];
+            tensor<fp16, [1, 1500, 1280]> x_95_cast_fp16 = reshape(shape = concat_7, x = var_934_cast_fp16)[name = string("x_95_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_938_to_fp16 = const()[name = string("op_938_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(299606976)))];
+            tensor<fp16, [1280]> var_939_to_fp16 = const()[name = string("op_939_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(302883840)))];
+            tensor<fp16, [1, 1500, 1280]> linear_45_cast_fp16 = linear(bias = var_939_to_fp16, weight = var_938_to_fp16, x = x_95_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_97_cast_fp16 = add(x = x_91_cast_fp16, y = linear_45_cast_fp16)[name = string("x_97_cast_fp16")];
+            tensor<int32, [1]> var_946_axes_0 = const()[name = string("op_946_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(302886464)))];
+            tensor<fp16, [1280]> blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(302889088)))];
+            tensor<fp16, [1, 1500, 1280]> var_946_cast_fp16 = layer_norm(axes = var_946_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_872_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_97_cast_fp16)[name = string("op_946_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_955_to_fp16 = const()[name = string("op_955_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(302891712)))];
+            tensor<fp16, [5120]> var_956_to_fp16 = const()[name = string("op_956_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(315998976)))];
+            tensor<fp16, [1, 1500, 5120]> linear_46_cast_fp16 = linear(bias = var_956_to_fp16, weight = var_955_to_fp16, x = var_946_cast_fp16)[name = string("linear_46_cast_fp16")];
+            string x_101_mode_0 = const()[name = string("x_101_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_101_cast_fp16 = gelu(mode = x_101_mode_0, x = linear_46_cast_fp16)[name = string("x_101_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_961_to_fp16 = const()[name = string("op_961_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(316009280)))];
+            tensor<fp16, [1280]> var_962_to_fp16 = const()[name = string("op_962_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329116544)))];
+            tensor<fp16, [1, 1500, 1280]> linear_47_cast_fp16 = linear(bias = var_962_to_fp16, weight = var_961_to_fp16, x = x_101_cast_fp16)[name = string("linear_47_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_47_cast_fp16)[name = string("x_103_cast_fp16")];
+            int32 var_972 = const()[name = string("op_972"), val = int32(-1)];
+            tensor<int32, [1]> var_988_axes_0 = const()[name = string("op_988_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329119168)))];
+            tensor<fp16, [1280]> blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329121792)))];
+            fp16 var_978_to_fp16 = const()[name = string("op_978_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_988_cast_fp16 = layer_norm(axes = var_988_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_978_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_103_cast_fp16)[name = string("op_988_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_999_to_fp16 = const()[name = string("op_999_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329124416)))];
+            tensor<fp16, [1280]> var_1000_to_fp16 = const()[name = string("op_1000_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(332401280)))];
+            tensor<fp16, [1, 1500, 1280]> linear_48_cast_fp16 = linear(bias = var_1000_to_fp16, weight = var_999_to_fp16, x = var_988_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(332403904)))];
+            tensor<fp16, [1, 1500, 1280]> linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1003_to_fp16, x = var_988_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1007_to_fp16 = const()[name = string("op_1007_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(335680768)))];
+            tensor<fp16, [1280]> var_1008_to_fp16 = const()[name = string("op_1008_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(338957632)))];
+            tensor<fp16, [1, 1500, 1280]> linear_50_cast_fp16 = linear(bias = var_1008_to_fp16, weight = var_1007_to_fp16, x = var_988_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [4]> var_1016 = const()[name = string("op_1016"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1017_cast_fp16 = reshape(shape = var_1016, x = linear_48_cast_fp16)[name = string("op_1017_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_35_cast_fp16 = mul(x = var_1017_cast_fp16, y = const_240_to_fp16)[name = string("q_35_cast_fp16")];
+            tensor<int32, [4]> var_1023 = const()[name = string("op_1023"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1024_cast_fp16 = reshape(shape = var_1023, x = linear_49_cast_fp16)[name = string("op_1024_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_35_cast_fp16 = mul(x = var_1024_cast_fp16, y = const_241_to_fp16)[name = string("k_35_cast_fp16")];
+            tensor<int32, [4]> var_1030 = const()[name = string("op_1030"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1031_cast_fp16 = reshape(shape = var_1030, x = linear_50_cast_fp16)[name = string("op_1031_cast_fp16")];
+            tensor<int32, [4]> var_1032 = const()[name = string("op_1032"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
+            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_81 = transpose(perm = transpose_81_perm_0, x = k_35_cast_fp16)[name = string("transpose_126")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = q_35_cast_fp16)[name = string("transpose_127")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_80, y = transpose_81)[name = string("qk_17_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1036_cast_fp16 = softmax(axis = var_972, x = qk_17_cast_fp16)[name = string("op_1036_cast_fp16")];
+            bool var_1038_transpose_x_0 = const()[name = string("op_1038_transpose_x_0"), val = bool(false)];
+            bool var_1038_transpose_y_0 = const()[name = string("op_1038_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_35_cast_fp16 = transpose(perm = var_1032, x = var_1031_cast_fp16)[name = string("transpose_125")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1038_cast_fp16 = matmul(transpose_x = var_1038_transpose_x_0, transpose_y = var_1038_transpose_y_0, x = var_1036_cast_fp16, y = v_35_cast_fp16)[name = string("op_1038_cast_fp16")];
+            tensor<int32, [4]> var_1039 = const()[name = string("op_1039"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1040_cast_fp16 = transpose(perm = var_1039, x = var_1038_cast_fp16)[name = string("transpose_124")];
+            tensor<fp16, [1, 1500, 1280]> x_107_cast_fp16 = reshape(shape = concat_8, x = var_1040_cast_fp16)[name = string("x_107_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1044_to_fp16 = const()[name = string("op_1044_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(338960256)))];
+            tensor<fp16, [1280]> var_1045_to_fp16 = const()[name = string("op_1045_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342237120)))];
+            tensor<fp16, [1, 1500, 1280]> linear_51_cast_fp16 = linear(bias = var_1045_to_fp16, weight = var_1044_to_fp16, x = x_107_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_109_cast_fp16 = add(x = x_103_cast_fp16, y = linear_51_cast_fp16)[name = string("x_109_cast_fp16")];
+            tensor<int32, [1]> var_1052_axes_0 = const()[name = string("op_1052_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342239744)))];
+            tensor<fp16, [1280]> blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342242368)))];
+            tensor<fp16, [1, 1500, 1280]> var_1052_cast_fp16 = layer_norm(axes = var_1052_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_978_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_109_cast_fp16)[name = string("op_1052_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342244992)))];
+            tensor<fp16, [5120]> var_1062_to_fp16 = const()[name = string("op_1062_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(355352256)))];
+            tensor<fp16, [1, 1500, 5120]> linear_52_cast_fp16 = linear(bias = var_1062_to_fp16, weight = var_1061_to_fp16, x = var_1052_cast_fp16)[name = string("linear_52_cast_fp16")];
+            string x_113_mode_0 = const()[name = string("x_113_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_113_cast_fp16 = gelu(mode = x_113_mode_0, x = linear_52_cast_fp16)[name = string("x_113_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1067_to_fp16 = const()[name = string("op_1067_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(355362560)))];
+            tensor<fp16, [1280]> var_1068_to_fp16 = const()[name = string("op_1068_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368469824)))];
+            tensor<fp16, [1, 1500, 1280]> linear_53_cast_fp16 = linear(bias = var_1068_to_fp16, weight = var_1067_to_fp16, x = x_113_cast_fp16)[name = string("linear_53_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_115_cast_fp16 = add(x = x_109_cast_fp16, y = linear_53_cast_fp16)[name = string("x_115_cast_fp16")];
+            int32 var_1078 = const()[name = string("op_1078"), val = int32(-1)];
+            tensor<int32, [1]> var_1094_axes_0 = const()[name = string("op_1094_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368472448)))];
+            tensor<fp16, [1280]> blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368475072)))];
+            fp16 var_1084_to_fp16 = const()[name = string("op_1084_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1094_cast_fp16 = layer_norm(axes = var_1094_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_1084_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_115_cast_fp16)[name = string("op_1094_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1105_to_fp16 = const()[name = string("op_1105_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368477696)))];
+            tensor<fp16, [1280]> var_1106_to_fp16 = const()[name = string("op_1106_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(371754560)))];
+            tensor<fp16, [1, 1500, 1280]> linear_54_cast_fp16 = linear(bias = var_1106_to_fp16, weight = var_1105_to_fp16, x = var_1094_cast_fp16)[name = string("linear_54_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(371757184)))];
+            tensor<fp16, [1, 1500, 1280]> linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1109_to_fp16, x = var_1094_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1113_to_fp16 = const()[name = string("op_1113_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(375034048)))];
+            tensor<fp16, [1280]> var_1114_to_fp16 = const()[name = string("op_1114_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(378310912)))];
+            tensor<fp16, [1, 1500, 1280]> linear_56_cast_fp16 = linear(bias = var_1114_to_fp16, weight = var_1113_to_fp16, x = var_1094_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<int32, [4]> var_1122 = const()[name = string("op_1122"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1123_cast_fp16 = reshape(shape = var_1122, x = linear_54_cast_fp16)[name = string("op_1123_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_242_to_fp16 = const()[name = string("const_242_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_39_cast_fp16 = mul(x = var_1123_cast_fp16, y = const_242_to_fp16)[name = string("q_39_cast_fp16")];
+            tensor<int32, [4]> var_1129 = const()[name = string("op_1129"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1130_cast_fp16 = reshape(shape = var_1129, x = linear_55_cast_fp16)[name = string("op_1130_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_39_cast_fp16 = mul(x = var_1130_cast_fp16, y = const_243_to_fp16)[name = string("k_39_cast_fp16")];
+            tensor<int32, [4]> var_1136 = const()[name = string("op_1136"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1137_cast_fp16 = reshape(shape = var_1136, x = linear_56_cast_fp16)[name = string("op_1137_cast_fp16")];
+            tensor<int32, [4]> var_1138 = const()[name = string("op_1138"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
+            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_83 = transpose(perm = transpose_83_perm_0, x = k_39_cast_fp16)[name = string("transpose_122")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_82 = transpose(perm = transpose_82_perm_0, x = q_39_cast_fp16)[name = string("transpose_123")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_82, y = transpose_83)[name = string("qk_19_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1142_cast_fp16 = softmax(axis = var_1078, x = qk_19_cast_fp16)[name = string("op_1142_cast_fp16")];
+            bool var_1144_transpose_x_0 = const()[name = string("op_1144_transpose_x_0"), val = bool(false)];
+            bool var_1144_transpose_y_0 = const()[name = string("op_1144_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_39_cast_fp16 = transpose(perm = var_1138, x = var_1137_cast_fp16)[name = string("transpose_121")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1144_cast_fp16 = matmul(transpose_x = var_1144_transpose_x_0, transpose_y = var_1144_transpose_y_0, x = var_1142_cast_fp16, y = v_39_cast_fp16)[name = string("op_1144_cast_fp16")];
+            tensor<int32, [4]> var_1145 = const()[name = string("op_1145"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_9 = const()[name = string("concat_9"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1146_cast_fp16 = transpose(perm = var_1145, x = var_1144_cast_fp16)[name = string("transpose_120")];
+            tensor<fp16, [1, 1500, 1280]> x_119_cast_fp16 = reshape(shape = concat_9, x = var_1146_cast_fp16)[name = string("x_119_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1150_to_fp16 = const()[name = string("op_1150_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(378313536)))];
+            tensor<fp16, [1280]> var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381590400)))];
+            tensor<fp16, [1, 1500, 1280]> linear_57_cast_fp16 = linear(bias = var_1151_to_fp16, weight = var_1150_to_fp16, x = x_119_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_121_cast_fp16 = add(x = x_115_cast_fp16, y = linear_57_cast_fp16)[name = string("x_121_cast_fp16")];
+            tensor<int32, [1]> var_1158_axes_0 = const()[name = string("op_1158_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381593024)))];
+            tensor<fp16, [1280]> blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381595648)))];
+            tensor<fp16, [1, 1500, 1280]> var_1158_cast_fp16 = layer_norm(axes = var_1158_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_1084_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_121_cast_fp16)[name = string("op_1158_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1167_to_fp16 = const()[name = string("op_1167_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381598272)))];
+            tensor<fp16, [5120]> var_1168_to_fp16 = const()[name = string("op_1168_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(394705536)))];
+            tensor<fp16, [1, 1500, 5120]> linear_58_cast_fp16 = linear(bias = var_1168_to_fp16, weight = var_1167_to_fp16, x = var_1158_cast_fp16)[name = string("linear_58_cast_fp16")];
+            string x_125_mode_0 = const()[name = string("x_125_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = linear_58_cast_fp16)[name = string("x_125_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1173_to_fp16 = const()[name = string("op_1173_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(394715840)))];
+            tensor<fp16, [1280]> var_1174_to_fp16 = const()[name = string("op_1174_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(407823104)))];
+            tensor<fp16, [1, 1500, 1280]> linear_59_cast_fp16 = linear(bias = var_1174_to_fp16, weight = var_1173_to_fp16, x = x_125_cast_fp16)[name = string("linear_59_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_127_cast_fp16 = add(x = x_121_cast_fp16, y = linear_59_cast_fp16)[name = string("x_127_cast_fp16")];
+            int32 var_1184 = const()[name = string("op_1184"), val = int32(-1)];
+            tensor<int32, [1]> var_1200_axes_0 = const()[name = string("op_1200_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(407825728)))];
+            tensor<fp16, [1280]> blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(407828352)))];
+            fp16 var_1190_to_fp16 = const()[name = string("op_1190_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1200_cast_fp16 = layer_norm(axes = var_1200_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_1190_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_127_cast_fp16)[name = string("op_1200_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(407830976)))];
+            tensor<fp16, [1280]> var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(411107840)))];
+            tensor<fp16, [1, 1500, 1280]> linear_60_cast_fp16 = linear(bias = var_1212_to_fp16, weight = var_1211_to_fp16, x = var_1200_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1215_to_fp16 = const()[name = string("op_1215_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(411110464)))];
+            tensor<fp16, [1, 1500, 1280]> linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1215_to_fp16, x = var_1200_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1219_to_fp16 = const()[name = string("op_1219_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(414387328)))];
+            tensor<fp16, [1280]> var_1220_to_fp16 = const()[name = string("op_1220_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(417664192)))];
+            tensor<fp16, [1, 1500, 1280]> linear_62_cast_fp16 = linear(bias = var_1220_to_fp16, weight = var_1219_to_fp16, x = var_1200_cast_fp16)[name = string("linear_62_cast_fp16")];
+            tensor<int32, [4]> var_1228 = const()[name = string("op_1228"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1229_cast_fp16 = reshape(shape = var_1228, x = linear_60_cast_fp16)[name = string("op_1229_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_244_to_fp16 = const()[name = string("const_244_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_43_cast_fp16 = mul(x = var_1229_cast_fp16, y = const_244_to_fp16)[name = string("q_43_cast_fp16")];
+            tensor<int32, [4]> var_1235 = const()[name = string("op_1235"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1236_cast_fp16 = reshape(shape = var_1235, x = linear_61_cast_fp16)[name = string("op_1236_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_43_cast_fp16 = mul(x = var_1236_cast_fp16, y = const_245_to_fp16)[name = string("k_43_cast_fp16")];
+            tensor<int32, [4]> var_1242 = const()[name = string("op_1242"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1243_cast_fp16 = reshape(shape = var_1242, x = linear_62_cast_fp16)[name = string("op_1243_cast_fp16")];
+            tensor<int32, [4]> var_1244 = const()[name = string("op_1244"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_21_transpose_x_0 = const()[name = string("qk_21_transpose_x_0"), val = bool(false)];
+            bool qk_21_transpose_y_0 = const()[name = string("qk_21_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_85 = transpose(perm = transpose_85_perm_0, x = k_43_cast_fp16)[name = string("transpose_118")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = q_43_cast_fp16)[name = string("transpose_119")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_21_cast_fp16 = matmul(transpose_x = qk_21_transpose_x_0, transpose_y = qk_21_transpose_y_0, x = transpose_84, y = transpose_85)[name = string("qk_21_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1248_cast_fp16 = softmax(axis = var_1184, x = qk_21_cast_fp16)[name = string("op_1248_cast_fp16")];
+            bool var_1250_transpose_x_0 = const()[name = string("op_1250_transpose_x_0"), val = bool(false)];
+            bool var_1250_transpose_y_0 = const()[name = string("op_1250_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_43_cast_fp16 = transpose(perm = var_1244, x = var_1243_cast_fp16)[name = string("transpose_117")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1250_cast_fp16 = matmul(transpose_x = var_1250_transpose_x_0, transpose_y = var_1250_transpose_y_0, x = var_1248_cast_fp16, y = v_43_cast_fp16)[name = string("op_1250_cast_fp16")];
+            tensor<int32, [4]> var_1251 = const()[name = string("op_1251"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_10 = const()[name = string("concat_10"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1252_cast_fp16 = transpose(perm = var_1251, x = var_1250_cast_fp16)[name = string("transpose_116")];
+            tensor<fp16, [1, 1500, 1280]> x_131_cast_fp16 = reshape(shape = concat_10, x = var_1252_cast_fp16)[name = string("x_131_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1256_to_fp16 = const()[name = string("op_1256_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(417666816)))];
+            tensor<fp16, [1280]> var_1257_to_fp16 = const()[name = string("op_1257_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(420943680)))];
+            tensor<fp16, [1, 1500, 1280]> linear_63_cast_fp16 = linear(bias = var_1257_to_fp16, weight = var_1256_to_fp16, x = x_131_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_133_cast_fp16 = add(x = x_127_cast_fp16, y = linear_63_cast_fp16)[name = string("x_133_cast_fp16")];
+            tensor<int32, [1]> var_1264_axes_0 = const()[name = string("op_1264_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(420946304)))];
+            tensor<fp16, [1280]> blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(420948928)))];
+            tensor<fp16, [1, 1500, 1280]> var_1264_cast_fp16 = layer_norm(axes = var_1264_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_1190_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_133_cast_fp16)[name = string("op_1264_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(420951552)))];
+            tensor<fp16, [5120]> var_1274_to_fp16 = const()[name = string("op_1274_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(434058816)))];
+            tensor<fp16, [1, 1500, 5120]> linear_64_cast_fp16 = linear(bias = var_1274_to_fp16, weight = var_1273_to_fp16, x = var_1264_cast_fp16)[name = string("linear_64_cast_fp16")];
+            string x_137_mode_0 = const()[name = string("x_137_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_137_cast_fp16 = gelu(mode = x_137_mode_0, x = linear_64_cast_fp16)[name = string("x_137_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1279_to_fp16 = const()[name = string("op_1279_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(434069120)))];
+            tensor<fp16, [1280]> var_1280_to_fp16 = const()[name = string("op_1280_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447176384)))];
+            tensor<fp16, [1, 1500, 1280]> linear_65_cast_fp16 = linear(bias = var_1280_to_fp16, weight = var_1279_to_fp16, x = x_137_cast_fp16)[name = string("linear_65_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_139_cast_fp16 = add(x = x_133_cast_fp16, y = linear_65_cast_fp16)[name = string("x_139_cast_fp16")];
+            int32 var_1290 = const()[name = string("op_1290"), val = int32(-1)];
+            tensor<int32, [1]> var_1306_axes_0 = const()[name = string("op_1306_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447179008)))];
+            tensor<fp16, [1280]> blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447181632)))];
+            fp16 var_1296_to_fp16 = const()[name = string("op_1296_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1306_cast_fp16 = layer_norm(axes = var_1306_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_1296_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_139_cast_fp16)[name = string("op_1306_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1317_to_fp16 = const()[name = string("op_1317_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447184256)))];
+            tensor<fp16, [1280]> var_1318_to_fp16 = const()[name = string("op_1318_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(450461120)))];
+            tensor<fp16, [1, 1500, 1280]> linear_66_cast_fp16 = linear(bias = var_1318_to_fp16, weight = var_1317_to_fp16, x = var_1306_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1321_to_fp16 = const()[name = string("op_1321_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(450463744)))];
+            tensor<fp16, [1, 1500, 1280]> linear_67_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1321_to_fp16, x = var_1306_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1325_to_fp16 = const()[name = string("op_1325_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(453740608)))];
+            tensor<fp16, [1280]> var_1326_to_fp16 = const()[name = string("op_1326_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(457017472)))];
+            tensor<fp16, [1, 1500, 1280]> linear_68_cast_fp16 = linear(bias = var_1326_to_fp16, weight = var_1325_to_fp16, x = var_1306_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [4]> var_1334 = const()[name = string("op_1334"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1335_cast_fp16 = reshape(shape = var_1334, x = linear_66_cast_fp16)[name = string("op_1335_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_246_to_fp16 = const()[name = string("const_246_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_47_cast_fp16 = mul(x = var_1335_cast_fp16, y = const_246_to_fp16)[name = string("q_47_cast_fp16")];
+            tensor<int32, [4]> var_1341 = const()[name = string("op_1341"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1342_cast_fp16 = reshape(shape = var_1341, x = linear_67_cast_fp16)[name = string("op_1342_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_247_to_fp16 = const()[name = string("const_247_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_47_cast_fp16 = mul(x = var_1342_cast_fp16, y = const_247_to_fp16)[name = string("k_47_cast_fp16")];
+            tensor<int32, [4]> var_1348 = const()[name = string("op_1348"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1349_cast_fp16 = reshape(shape = var_1348, x = linear_68_cast_fp16)[name = string("op_1349_cast_fp16")];
+            tensor<int32, [4]> var_1350 = const()[name = string("op_1350"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)];
+            bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_87 = transpose(perm = transpose_87_perm_0, x = k_47_cast_fp16)[name = string("transpose_114")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_86 = transpose(perm = transpose_86_perm_0, x = q_47_cast_fp16)[name = string("transpose_115")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_86, y = transpose_87)[name = string("qk_23_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1354_cast_fp16 = softmax(axis = var_1290, x = qk_23_cast_fp16)[name = string("op_1354_cast_fp16")];
+            bool var_1356_transpose_x_0 = const()[name = string("op_1356_transpose_x_0"), val = bool(false)];
+            bool var_1356_transpose_y_0 = const()[name = string("op_1356_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_47_cast_fp16 = transpose(perm = var_1350, x = var_1349_cast_fp16)[name = string("transpose_113")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1356_cast_fp16 = matmul(transpose_x = var_1356_transpose_x_0, transpose_y = var_1356_transpose_y_0, x = var_1354_cast_fp16, y = v_47_cast_fp16)[name = string("op_1356_cast_fp16")];
+            tensor<int32, [4]> var_1357 = const()[name = string("op_1357"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1358_cast_fp16 = transpose(perm = var_1357, x = var_1356_cast_fp16)[name = string("transpose_112")];
+            tensor<fp16, [1, 1500, 1280]> x_143_cast_fp16 = reshape(shape = concat_11, x = var_1358_cast_fp16)[name = string("x_143_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(457020096)))];
+            tensor<fp16, [1280]> var_1363_to_fp16 = const()[name = string("op_1363_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460296960)))];
+            tensor<fp16, [1, 1500, 1280]> linear_69_cast_fp16 = linear(bias = var_1363_to_fp16, weight = var_1362_to_fp16, x = x_143_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_145_cast_fp16 = add(x = x_139_cast_fp16, y = linear_69_cast_fp16)[name = string("x_145_cast_fp16")];
+            tensor<int32, [1]> var_1370_axes_0 = const()[name = string("op_1370_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460299584)))];
+            tensor<fp16, [1280]> blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460302208)))];
+            tensor<fp16, [1, 1500, 1280]> var_1370_cast_fp16 = layer_norm(axes = var_1370_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_1296_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_145_cast_fp16)[name = string("op_1370_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1379_to_fp16 = const()[name = string("op_1379_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460304832)))];
+            tensor<fp16, [5120]> var_1380_to_fp16 = const()[name = string("op_1380_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(473412096)))];
+            tensor<fp16, [1, 1500, 5120]> linear_70_cast_fp16 = linear(bias = var_1380_to_fp16, weight = var_1379_to_fp16, x = var_1370_cast_fp16)[name = string("linear_70_cast_fp16")];
+            string x_149_mode_0 = const()[name = string("x_149_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_149_cast_fp16 = gelu(mode = x_149_mode_0, x = linear_70_cast_fp16)[name = string("x_149_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1385_to_fp16 = const()[name = string("op_1385_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(473422400)))];
+            tensor<fp16, [1280]> var_1386_to_fp16 = const()[name = string("op_1386_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486529664)))];
+            tensor<fp16, [1, 1500, 1280]> linear_71_cast_fp16 = linear(bias = var_1386_to_fp16, weight = var_1385_to_fp16, x = x_149_cast_fp16)[name = string("linear_71_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_151_cast_fp16 = add(x = x_145_cast_fp16, y = linear_71_cast_fp16)[name = string("x_151_cast_fp16")];
+            int32 var_1396 = const()[name = string("op_1396"), val = int32(-1)];
+            tensor<int32, [1]> var_1412_axes_0 = const()[name = string("op_1412_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486532288)))];
+            tensor<fp16, [1280]> blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486534912)))];
+            fp16 var_1402_to_fp16 = const()[name = string("op_1402_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1412_cast_fp16 = layer_norm(axes = var_1412_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_1402_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_151_cast_fp16)[name = string("op_1412_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486537536)))];
+            tensor<fp16, [1280]> var_1424_to_fp16 = const()[name = string("op_1424_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(489814400)))];
+            tensor<fp16, [1, 1500, 1280]> linear_72_cast_fp16 = linear(bias = var_1424_to_fp16, weight = var_1423_to_fp16, x = var_1412_cast_fp16)[name = string("linear_72_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1427_to_fp16 = const()[name = string("op_1427_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(489817024)))];
+            tensor<fp16, [1, 1500, 1280]> linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1427_to_fp16, x = var_1412_cast_fp16)[name = string("linear_73_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1431_to_fp16 = const()[name = string("op_1431_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(493093888)))];
+            tensor<fp16, [1280]> var_1432_to_fp16 = const()[name = string("op_1432_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(496370752)))];
+            tensor<fp16, [1, 1500, 1280]> linear_74_cast_fp16 = linear(bias = var_1432_to_fp16, weight = var_1431_to_fp16, x = var_1412_cast_fp16)[name = string("linear_74_cast_fp16")];
+            tensor<int32, [4]> var_1440 = const()[name = string("op_1440"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1441_cast_fp16 = reshape(shape = var_1440, x = linear_72_cast_fp16)[name = string("op_1441_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_248_to_fp16 = const()[name = string("const_248_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_51_cast_fp16 = mul(x = var_1441_cast_fp16, y = const_248_to_fp16)[name = string("q_51_cast_fp16")];
+            tensor<int32, [4]> var_1447 = const()[name = string("op_1447"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1448_cast_fp16 = reshape(shape = var_1447, x = linear_73_cast_fp16)[name = string("op_1448_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_51_cast_fp16 = mul(x = var_1448_cast_fp16, y = const_249_to_fp16)[name = string("k_51_cast_fp16")];
+            tensor<int32, [4]> var_1454 = const()[name = string("op_1454"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1455_cast_fp16 = reshape(shape = var_1454, x = linear_74_cast_fp16)[name = string("op_1455_cast_fp16")];
+            tensor<int32, [4]> var_1456 = const()[name = string("op_1456"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)];
+            bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_89 = transpose(perm = transpose_89_perm_0, x = k_51_cast_fp16)[name = string("transpose_110")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = q_51_cast_fp16)[name = string("transpose_111")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_88, y = transpose_89)[name = string("qk_25_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1460_cast_fp16 = softmax(axis = var_1396, x = qk_25_cast_fp16)[name = string("op_1460_cast_fp16")];
+            bool var_1462_transpose_x_0 = const()[name = string("op_1462_transpose_x_0"), val = bool(false)];
+            bool var_1462_transpose_y_0 = const()[name = string("op_1462_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_51_cast_fp16 = transpose(perm = var_1456, x = var_1455_cast_fp16)[name = string("transpose_109")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1462_cast_fp16 = matmul(transpose_x = var_1462_transpose_x_0, transpose_y = var_1462_transpose_y_0, x = var_1460_cast_fp16, y = v_51_cast_fp16)[name = string("op_1462_cast_fp16")];
+            tensor<int32, [4]> var_1463 = const()[name = string("op_1463"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_12 = const()[name = string("concat_12"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1464_cast_fp16 = transpose(perm = var_1463, x = var_1462_cast_fp16)[name = string("transpose_108")];
+            tensor<fp16, [1, 1500, 1280]> x_155_cast_fp16 = reshape(shape = concat_12, x = var_1464_cast_fp16)[name = string("x_155_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1468_to_fp16 = const()[name = string("op_1468_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(496373376)))];
+            tensor<fp16, [1280]> var_1469_to_fp16 = const()[name = string("op_1469_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(499650240)))];
+            tensor<fp16, [1, 1500, 1280]> linear_75_cast_fp16 = linear(bias = var_1469_to_fp16, weight = var_1468_to_fp16, x = x_155_cast_fp16)[name = string("linear_75_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_157_cast_fp16 = add(x = x_151_cast_fp16, y = linear_75_cast_fp16)[name = string("x_157_cast_fp16")];
+            tensor<int32, [1]> var_1476_axes_0 = const()[name = string("op_1476_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(499652864)))];
+            tensor<fp16, [1280]> blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(499655488)))];
+            tensor<fp16, [1, 1500, 1280]> var_1476_cast_fp16 = layer_norm(axes = var_1476_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_1402_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_157_cast_fp16)[name = string("op_1476_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1485_to_fp16 = const()[name = string("op_1485_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(499658112)))];
+            tensor<fp16, [5120]> var_1486_to_fp16 = const()[name = string("op_1486_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(512765376)))];
+            tensor<fp16, [1, 1500, 5120]> linear_76_cast_fp16 = linear(bias = var_1486_to_fp16, weight = var_1485_to_fp16, x = var_1476_cast_fp16)[name = string("linear_76_cast_fp16")];
+            string x_161_mode_0 = const()[name = string("x_161_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_161_cast_fp16 = gelu(mode = x_161_mode_0, x = linear_76_cast_fp16)[name = string("x_161_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1491_to_fp16 = const()[name = string("op_1491_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(512775680)))];
+            tensor<fp16, [1280]> var_1492_to_fp16 = const()[name = string("op_1492_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(525882944)))];
+            tensor<fp16, [1, 1500, 1280]> linear_77_cast_fp16 = linear(bias = var_1492_to_fp16, weight = var_1491_to_fp16, x = x_161_cast_fp16)[name = string("linear_77_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_163_cast_fp16 = add(x = x_157_cast_fp16, y = linear_77_cast_fp16)[name = string("x_163_cast_fp16")];
+            int32 var_1502 = const()[name = string("op_1502"), val = int32(-1)];
+            tensor<int32, [1]> var_1518_axes_0 = const()[name = string("op_1518_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(525885568)))];
+            tensor<fp16, [1280]> blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(525888192)))];
+            fp16 var_1508_to_fp16 = const()[name = string("op_1508_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1518_cast_fp16 = layer_norm(axes = var_1518_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_1508_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_163_cast_fp16)[name = string("op_1518_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1529_to_fp16 = const()[name = string("op_1529_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(525890816)))];
+            tensor<fp16, [1280]> var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(529167680)))];
+            tensor<fp16, [1, 1500, 1280]> linear_78_cast_fp16 = linear(bias = var_1530_to_fp16, weight = var_1529_to_fp16, x = var_1518_cast_fp16)[name = string("linear_78_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1533_to_fp16 = const()[name = string("op_1533_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(529170304)))];
+            tensor<fp16, [1, 1500, 1280]> linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1533_to_fp16, x = var_1518_cast_fp16)[name = string("linear_79_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1537_to_fp16 = const()[name = string("op_1537_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(532447168)))];
+            tensor<fp16, [1280]> var_1538_to_fp16 = const()[name = string("op_1538_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(535724032)))];
+            tensor<fp16, [1, 1500, 1280]> linear_80_cast_fp16 = linear(bias = var_1538_to_fp16, weight = var_1537_to_fp16, x = var_1518_cast_fp16)[name = string("linear_80_cast_fp16")];
+            tensor<int32, [4]> var_1546 = const()[name = string("op_1546"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1547_cast_fp16 = reshape(shape = var_1546, x = linear_78_cast_fp16)[name = string("op_1547_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_55_cast_fp16 = mul(x = var_1547_cast_fp16, y = const_250_to_fp16)[name = string("q_55_cast_fp16")];
+            tensor<int32, [4]> var_1553 = const()[name = string("op_1553"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1554_cast_fp16 = reshape(shape = var_1553, x = linear_79_cast_fp16)[name = string("op_1554_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_55_cast_fp16 = mul(x = var_1554_cast_fp16, y = const_251_to_fp16)[name = string("k_55_cast_fp16")];
+            tensor<int32, [4]> var_1560 = const()[name = string("op_1560"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1561_cast_fp16 = reshape(shape = var_1560, x = linear_80_cast_fp16)[name = string("op_1561_cast_fp16")];
+            tensor<int32, [4]> var_1562 = const()[name = string("op_1562"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_27_transpose_x_0 = const()[name = string("qk_27_transpose_x_0"), val = bool(false)];
+            bool qk_27_transpose_y_0 = const()[name = string("qk_27_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_91 = transpose(perm = transpose_91_perm_0, x = k_55_cast_fp16)[name = string("transpose_106")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_90 = transpose(perm = transpose_90_perm_0, x = q_55_cast_fp16)[name = string("transpose_107")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_27_cast_fp16 = matmul(transpose_x = qk_27_transpose_x_0, transpose_y = qk_27_transpose_y_0, x = transpose_90, y = transpose_91)[name = string("qk_27_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1566_cast_fp16 = softmax(axis = var_1502, x = qk_27_cast_fp16)[name = string("op_1566_cast_fp16")];
+            bool var_1568_transpose_x_0 = const()[name = string("op_1568_transpose_x_0"), val = bool(false)];
+            bool var_1568_transpose_y_0 = const()[name = string("op_1568_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_55_cast_fp16 = transpose(perm = var_1562, x = var_1561_cast_fp16)[name = string("transpose_105")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1568_cast_fp16 = matmul(transpose_x = var_1568_transpose_x_0, transpose_y = var_1568_transpose_y_0, x = var_1566_cast_fp16, y = v_55_cast_fp16)[name = string("op_1568_cast_fp16")];
+            tensor<int32, [4]> var_1569 = const()[name = string("op_1569"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_13 = const()[name = string("concat_13"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1570_cast_fp16 = transpose(perm = var_1569, x = var_1568_cast_fp16)[name = string("transpose_104")];
+            tensor<fp16, [1, 1500, 1280]> x_167_cast_fp16 = reshape(shape = concat_13, x = var_1570_cast_fp16)[name = string("x_167_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1574_to_fp16 = const()[name = string("op_1574_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(535726656)))];
+            tensor<fp16, [1280]> var_1575_to_fp16 = const()[name = string("op_1575_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539003520)))];
+            tensor<fp16, [1, 1500, 1280]> linear_81_cast_fp16 = linear(bias = var_1575_to_fp16, weight = var_1574_to_fp16, x = x_167_cast_fp16)[name = string("linear_81_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_169_cast_fp16 = add(x = x_163_cast_fp16, y = linear_81_cast_fp16)[name = string("x_169_cast_fp16")];
+            tensor<int32, [1]> var_1582_axes_0 = const()[name = string("op_1582_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539006144)))];
+            tensor<fp16, [1280]> blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539008768)))];
+            tensor<fp16, [1, 1500, 1280]> var_1582_cast_fp16 = layer_norm(axes = var_1582_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_1508_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_169_cast_fp16)[name = string("op_1582_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1591_to_fp16 = const()[name = string("op_1591_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539011392)))];
+            tensor<fp16, [5120]> var_1592_to_fp16 = const()[name = string("op_1592_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(552118656)))];
+            tensor<fp16, [1, 1500, 5120]> linear_82_cast_fp16 = linear(bias = var_1592_to_fp16, weight = var_1591_to_fp16, x = var_1582_cast_fp16)[name = string("linear_82_cast_fp16")];
+            string x_173_mode_0 = const()[name = string("x_173_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_173_cast_fp16 = gelu(mode = x_173_mode_0, x = linear_82_cast_fp16)[name = string("x_173_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1597_to_fp16 = const()[name = string("op_1597_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(552128960)))];
+            tensor<fp16, [1280]> var_1598_to_fp16 = const()[name = string("op_1598_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565236224)))];
+            tensor<fp16, [1, 1500, 1280]> linear_83_cast_fp16 = linear(bias = var_1598_to_fp16, weight = var_1597_to_fp16, x = x_173_cast_fp16)[name = string("linear_83_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_175_cast_fp16 = add(x = x_169_cast_fp16, y = linear_83_cast_fp16)[name = string("x_175_cast_fp16")];
+            int32 var_1608 = const()[name = string("op_1608"), val = int32(-1)];
+            tensor<int32, [1]> var_1624_axes_0 = const()[name = string("op_1624_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565238848)))];
+            tensor<fp16, [1280]> blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565241472)))];
+            fp16 var_1614_to_fp16 = const()[name = string("op_1614_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1624_cast_fp16 = layer_norm(axes = var_1624_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_1614_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_175_cast_fp16)[name = string("op_1624_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1635_to_fp16 = const()[name = string("op_1635_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565244096)))];
+            tensor<fp16, [1280]> var_1636_to_fp16 = const()[name = string("op_1636_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(568520960)))];
+            tensor<fp16, [1, 1500, 1280]> linear_84_cast_fp16 = linear(bias = var_1636_to_fp16, weight = var_1635_to_fp16, x = var_1624_cast_fp16)[name = string("linear_84_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1639_to_fp16 = const()[name = string("op_1639_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(568523584)))];
+            tensor<fp16, [1, 1500, 1280]> linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1639_to_fp16, x = var_1624_cast_fp16)[name = string("linear_85_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(571800448)))];
+            tensor<fp16, [1280]> var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(575077312)))];
+            tensor<fp16, [1, 1500, 1280]> linear_86_cast_fp16 = linear(bias = var_1644_to_fp16, weight = var_1643_to_fp16, x = var_1624_cast_fp16)[name = string("linear_86_cast_fp16")];
+            tensor<int32, [4]> var_1652 = const()[name = string("op_1652"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1653_cast_fp16 = reshape(shape = var_1652, x = linear_84_cast_fp16)[name = string("op_1653_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_252_to_fp16 = const()[name = string("const_252_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_59_cast_fp16 = mul(x = var_1653_cast_fp16, y = const_252_to_fp16)[name = string("q_59_cast_fp16")];
+            tensor<int32, [4]> var_1659 = const()[name = string("op_1659"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1660_cast_fp16 = reshape(shape = var_1659, x = linear_85_cast_fp16)[name = string("op_1660_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_253_to_fp16 = const()[name = string("const_253_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_59_cast_fp16 = mul(x = var_1660_cast_fp16, y = const_253_to_fp16)[name = string("k_59_cast_fp16")];
+            tensor<int32, [4]> var_1666 = const()[name = string("op_1666"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1667_cast_fp16 = reshape(shape = var_1666, x = linear_86_cast_fp16)[name = string("op_1667_cast_fp16")];
+            tensor<int32, [4]> var_1668 = const()[name = string("op_1668"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)];
+            bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_93 = transpose(perm = transpose_93_perm_0, x = k_59_cast_fp16)[name = string("transpose_102")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = q_59_cast_fp16)[name = string("transpose_103")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_92, y = transpose_93)[name = string("qk_29_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1672_cast_fp16 = softmax(axis = var_1608, x = qk_29_cast_fp16)[name = string("op_1672_cast_fp16")];
+            bool var_1674_transpose_x_0 = const()[name = string("op_1674_transpose_x_0"), val = bool(false)];
+            bool var_1674_transpose_y_0 = const()[name = string("op_1674_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_59_cast_fp16 = transpose(perm = var_1668, x = var_1667_cast_fp16)[name = string("transpose_101")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1674_cast_fp16 = matmul(transpose_x = var_1674_transpose_x_0, transpose_y = var_1674_transpose_y_0, x = var_1672_cast_fp16, y = v_59_cast_fp16)[name = string("op_1674_cast_fp16")];
+            tensor<int32, [4]> var_1675 = const()[name = string("op_1675"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1676_cast_fp16 = transpose(perm = var_1675, x = var_1674_cast_fp16)[name = string("transpose_100")];
+            tensor<fp16, [1, 1500, 1280]> x_179_cast_fp16 = reshape(shape = concat_14, x = var_1676_cast_fp16)[name = string("x_179_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1680_to_fp16 = const()[name = string("op_1680_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(575079936)))];
+            tensor<fp16, [1280]> var_1681_to_fp16 = const()[name = string("op_1681_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578356800)))];
+            tensor<fp16, [1, 1500, 1280]> linear_87_cast_fp16 = linear(bias = var_1681_to_fp16, weight = var_1680_to_fp16, x = x_179_cast_fp16)[name = string("linear_87_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_181_cast_fp16 = add(x = x_175_cast_fp16, y = linear_87_cast_fp16)[name = string("x_181_cast_fp16")];
+            tensor<int32, [1]> var_1688_axes_0 = const()[name = string("op_1688_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578359424)))];
+            tensor<fp16, [1280]> blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578362048)))];
+            tensor<fp16, [1, 1500, 1280]> var_1688_cast_fp16 = layer_norm(axes = var_1688_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_1614_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_181_cast_fp16)[name = string("op_1688_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1697_to_fp16 = const()[name = string("op_1697_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578364672)))];
+            tensor<fp16, [5120]> var_1698_to_fp16 = const()[name = string("op_1698_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(591471936)))];
+            tensor<fp16, [1, 1500, 5120]> linear_88_cast_fp16 = linear(bias = var_1698_to_fp16, weight = var_1697_to_fp16, x = var_1688_cast_fp16)[name = string("linear_88_cast_fp16")];
+            string x_185_mode_0 = const()[name = string("x_185_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_185_cast_fp16 = gelu(mode = x_185_mode_0, x = linear_88_cast_fp16)[name = string("x_185_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1703_to_fp16 = const()[name = string("op_1703_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(591482240)))];
+            tensor<fp16, [1280]> var_1704_to_fp16 = const()[name = string("op_1704_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604589504)))];
+            tensor<fp16, [1, 1500, 1280]> linear_89_cast_fp16 = linear(bias = var_1704_to_fp16, weight = var_1703_to_fp16, x = x_185_cast_fp16)[name = string("linear_89_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_187_cast_fp16 = add(x = x_181_cast_fp16, y = linear_89_cast_fp16)[name = string("x_187_cast_fp16")];
+            int32 var_1714 = const()[name = string("op_1714"), val = int32(-1)];
+            tensor<int32, [1]> var_1730_axes_0 = const()[name = string("op_1730_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604592128)))];
+            tensor<fp16, [1280]> blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604594752)))];
+            fp16 var_1720_to_fp16 = const()[name = string("op_1720_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1730_cast_fp16 = layer_norm(axes = var_1730_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_1720_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_187_cast_fp16)[name = string("op_1730_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1741_to_fp16 = const()[name = string("op_1741_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604597376)))];
+            tensor<fp16, [1280]> var_1742_to_fp16 = const()[name = string("op_1742_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(607874240)))];
+            tensor<fp16, [1, 1500, 1280]> linear_90_cast_fp16 = linear(bias = var_1742_to_fp16, weight = var_1741_to_fp16, x = var_1730_cast_fp16)[name = string("linear_90_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1745_to_fp16 = const()[name = string("op_1745_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(607876864)))];
+            tensor<fp16, [1, 1500, 1280]> linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1745_to_fp16, x = var_1730_cast_fp16)[name = string("linear_91_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1749_to_fp16 = const()[name = string("op_1749_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(611153728)))];
+            tensor<fp16, [1280]> var_1750_to_fp16 = const()[name = string("op_1750_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(614430592)))];
+            tensor<fp16, [1, 1500, 1280]> linear_92_cast_fp16 = linear(bias = var_1750_to_fp16, weight = var_1749_to_fp16, x = var_1730_cast_fp16)[name = string("linear_92_cast_fp16")];
+            tensor<int32, [4]> var_1758 = const()[name = string("op_1758"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1759_cast_fp16 = reshape(shape = var_1758, x = linear_90_cast_fp16)[name = string("op_1759_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_254_to_fp16 = const()[name = string("const_254_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_63_cast_fp16 = mul(x = var_1759_cast_fp16, y = const_254_to_fp16)[name = string("q_63_cast_fp16")];
+            tensor<int32, [4]> var_1765 = const()[name = string("op_1765"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1766_cast_fp16 = reshape(shape = var_1765, x = linear_91_cast_fp16)[name = string("op_1766_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_255_to_fp16 = const()[name = string("const_255_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_63_cast_fp16 = mul(x = var_1766_cast_fp16, y = const_255_to_fp16)[name = string("k_63_cast_fp16")];
+            tensor<int32, [4]> var_1772 = const()[name = string("op_1772"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1773_cast_fp16 = reshape(shape = var_1772, x = linear_92_cast_fp16)[name = string("op_1773_cast_fp16")];
+            tensor<int32, [4]> var_1774 = const()[name = string("op_1774"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)];
+            bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_95 = transpose(perm = transpose_95_perm_0, x = k_63_cast_fp16)[name = string("transpose_98")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_94 = transpose(perm = transpose_94_perm_0, x = q_63_cast_fp16)[name = string("transpose_99")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_94, y = transpose_95)[name = string("qk_31_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1778_cast_fp16 = softmax(axis = var_1714, x = qk_31_cast_fp16)[name = string("op_1778_cast_fp16")];
+            bool var_1780_transpose_x_0 = const()[name = string("op_1780_transpose_x_0"), val = bool(false)];
+            bool var_1780_transpose_y_0 = const()[name = string("op_1780_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_63_cast_fp16 = transpose(perm = var_1774, x = var_1773_cast_fp16)[name = string("transpose_97")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1780_cast_fp16 = matmul(transpose_x = var_1780_transpose_x_0, transpose_y = var_1780_transpose_y_0, x = var_1778_cast_fp16, y = v_63_cast_fp16)[name = string("op_1780_cast_fp16")];
+            tensor<int32, [4]> var_1781 = const()[name = string("op_1781"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_15 = const()[name = string("concat_15"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1782_cast_fp16 = transpose(perm = var_1781, x = var_1780_cast_fp16)[name = string("transpose_96")];
+            tensor<fp16, [1, 1500, 1280]> x_191_cast_fp16 = reshape(shape = concat_15, x = var_1782_cast_fp16)[name = string("x_191_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1786_to_fp16 = const()[name = string("op_1786_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(614433216)))];
+            tensor<fp16, [1280]> var_1787_to_fp16 = const()[name = string("op_1787_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(617710080)))];
+            tensor<fp16, [1, 1500, 1280]> linear_93_cast_fp16 = linear(bias = var_1787_to_fp16, weight = var_1786_to_fp16, x = x_191_cast_fp16)[name = string("linear_93_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_193_cast_fp16_1 = add(x = x_187_cast_fp16, y = linear_93_cast_fp16)[name = string("x_193_cast_fp16")];
+            tensor<int32, [1]> var_1794_axes_0 = const()[name = string("op_1794_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(617712704)))];
+            tensor<fp16, [1280]> blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(617715328)))];
+            tensor<fp16, [1, 1500, 1280]> var_1794_cast_fp16 = layer_norm(axes = var_1794_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_1720_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_193_cast_fp16_1)[name = string("op_1794_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1803_to_fp16 = const()[name = string("op_1803_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(617717952)))];
+            tensor<fp16, [5120]> var_1804_to_fp16 = const()[name = string("op_1804_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(630825216)))];
+            tensor<fp16, [1, 1500, 5120]> linear_94_cast_fp16 = linear(bias = var_1804_to_fp16, weight = var_1803_to_fp16, x = var_1794_cast_fp16)[name = string("linear_94_cast_fp16")];
+            string x_197_mode_0 = const()[name = string("x_197_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_197_cast_fp16 = gelu(mode = x_197_mode_0, x = linear_94_cast_fp16)[name = string("x_197_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1809_to_fp16 = const()[name = string("op_1809_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(630835520)))];
+            tensor<fp16, [1280]> var_1810_to_fp16 = const()[name = string("op_1810_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(643942784)))];
+            tensor<fp16, [1, 1500, 1280]> linear_95_cast_fp16_1 = linear(bias = var_1810_to_fp16, weight = var_1809_to_fp16, x = x_197_cast_fp16)[name = string("linear_95_cast_fp16")];
+            string x_193_cast_fp16_dtype_0 = const()[name = string("x_193_cast_fp16_dtype_0"), val = string("fp32")];
+            string linear_95_cast_fp16_dtype_0 = const()[name = string("linear_95_cast_fp16_dtype_0"), val = string("fp32")];
+            tensor<fp32, [1, 1500, 1280]> linear_95_cast_fp16 = cast(dtype = linear_95_cast_fp16_dtype_0, x = linear_95_cast_fp16_1)[name = string("cast_2")];
+            tensor<fp32, [1, 1500, 1280]> x_193_cast_fp16 = cast(dtype = x_193_cast_fp16_dtype_0, x = x_193_cast_fp16_1)[name = string("cast_3")];
+        } -> (x_193_cast_fp16, linear_95_cast_fp16);
+}
\ No newline at end of file
diff --git a/large-v2/encoder.mlmodelc/model0/weights/0-weight.bin b/large-v2/encoder.mlmodelc/model0/weights/0-weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4fbb183d75a322c46705714925320d8d872f2431
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/model0/weights/0-weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0d6315a62c6344e1bf4ac88f7f7c8408cc886645c98e8989f249229fd9e9c70
+size 643945408
diff --git a/large-v2/encoder.mlmodelc/model1/analytics/coremldata.bin b/large-v2/encoder.mlmodelc/model1/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5ed18ae44ab3d09ffbed846536c84109f12b19b1
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/model1/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a8281049b2a65a3be541cfd9f949e84b8fe1c5251ce90e46da1626fed54e58a
+size 108
diff --git a/large-v2/encoder.mlmodelc/model1/coremldata.bin b/large-v2/encoder.mlmodelc/model1/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2ea59338ab416594015715ac6994e32a8c96e239
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/model1/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70195139816248a2b1fbef695f96decb60b35af6f364f84a7d2293a3d0a09e11
+size 196
diff --git a/large-v2/encoder.mlmodelc/model1/model.mil b/large-v2/encoder.mlmodelc/model1/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..7d3b026fa91fad416f7820629ab7ce05c46aad69
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/model1/model.mil
@@ -0,0 +1,945 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
+{
+    func main<ios18>(tensor<fp32, [1, 1500, 1280]> linear_95_cast_fp16, tensor<fp32, [1, 1500, 1280]> x_193_cast_fp16) {
+            tensor<fp16, [1280]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(64)))];
+            string cast_1_dtype_0 = const()[name = string("cast_1_dtype_0"), val = string("fp16")];
+            string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("fp16")];
+            tensor<fp16, [1, 1500, 1280]> cast_0 = cast(dtype = cast_0_dtype_0, x = linear_95_cast_fp16)[name = string("cast_0")];
+            tensor<fp16, [1, 1500, 1280]> cast_1 = cast(dtype = cast_1_dtype_0, x = x_193_cast_fp16)[name = string("cast_1")];
+            tensor<fp16, [1, 1500, 1280]> x_199_cast_fp16 = add(x = cast_1, y = cast_0)[name = string("x_199_cast_fp16")];
+            int32 var_1820 = const()[name = string("op_1820"), val = int32(-1)];
+            tensor<int32, [1]> var_1836_axes_0 = const()[name = string("op_1836_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(2688)))];
+            tensor<fp16, [1280]> blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(5312)))];
+            fp16 var_1826_to_fp16 = const()[name = string("op_1826_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1836_cast_fp16 = layer_norm(axes = var_1836_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_1826_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_199_cast_fp16)[name = string("op_1836_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1847_to_fp16 = const()[name = string("op_1847_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(7936)))];
+            tensor<fp16, [1280]> var_1848_to_fp16 = const()[name = string("op_1848_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(3284800)))];
+            tensor<fp16, [1, 1500, 1280]> linear_96_cast_fp16 = linear(bias = var_1848_to_fp16, weight = var_1847_to_fp16, x = var_1836_cast_fp16)[name = string("linear_96_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1851_to_fp16 = const()[name = string("op_1851_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(3287424)))];
+            tensor<fp16, [1, 1500, 1280]> linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1851_to_fp16, x = var_1836_cast_fp16)[name = string("linear_97_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1855_to_fp16 = const()[name = string("op_1855_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(6564288)))];
+            tensor<fp16, [1280]> var_1856_to_fp16 = const()[name = string("op_1856_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(9841152)))];
+            tensor<fp16, [1, 1500, 1280]> linear_98_cast_fp16 = linear(bias = var_1856_to_fp16, weight = var_1855_to_fp16, x = var_1836_cast_fp16)[name = string("linear_98_cast_fp16")];
+            tensor<int32, [4]> var_1864 = const()[name = string("op_1864"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1865_cast_fp16 = reshape(shape = var_1864, x = linear_96_cast_fp16)[name = string("op_1865_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_256_to_fp16 = const()[name = string("const_256_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_67_cast_fp16 = mul(x = var_1865_cast_fp16, y = const_256_to_fp16)[name = string("q_67_cast_fp16")];
+            tensor<int32, [4]> var_1871 = const()[name = string("op_1871"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1872_cast_fp16 = reshape(shape = var_1871, x = linear_97_cast_fp16)[name = string("op_1872_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_257_to_fp16 = const()[name = string("const_257_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_67_cast_fp16 = mul(x = var_1872_cast_fp16, y = const_257_to_fp16)[name = string("k_67_cast_fp16")];
+            tensor<int32, [4]> var_1878 = const()[name = string("op_1878"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1879_cast_fp16 = reshape(shape = var_1878, x = linear_98_cast_fp16)[name = string("op_1879_cast_fp16")];
+            tensor<int32, [4]> var_1880 = const()[name = string("op_1880"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_33_transpose_x_0 = const()[name = string("qk_33_transpose_x_0"), val = bool(false)];
+            bool qk_33_transpose_y_0 = const()[name = string("qk_33_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_65 = transpose(perm = transpose_65_perm_0, x = k_67_cast_fp16)[name = string("transpose_158")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_64 = transpose(perm = transpose_64_perm_0, x = q_67_cast_fp16)[name = string("transpose_159")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_33_cast_fp16 = matmul(transpose_x = qk_33_transpose_x_0, transpose_y = qk_33_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("qk_33_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1884_cast_fp16 = softmax(axis = var_1820, x = qk_33_cast_fp16)[name = string("op_1884_cast_fp16")];
+            bool var_1886_transpose_x_0 = const()[name = string("op_1886_transpose_x_0"), val = bool(false)];
+            bool var_1886_transpose_y_0 = const()[name = string("op_1886_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_67_cast_fp16 = transpose(perm = var_1880, x = var_1879_cast_fp16)[name = string("transpose_157")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1886_cast_fp16 = matmul(transpose_x = var_1886_transpose_x_0, transpose_y = var_1886_transpose_y_0, x = var_1884_cast_fp16, y = v_67_cast_fp16)[name = string("op_1886_cast_fp16")];
+            tensor<int32, [4]> var_1887 = const()[name = string("op_1887"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_16 = const()[name = string("concat_16"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1888_cast_fp16 = transpose(perm = var_1887, x = var_1886_cast_fp16)[name = string("transpose_156")];
+            tensor<fp16, [1, 1500, 1280]> x_203_cast_fp16 = reshape(shape = concat_16, x = var_1888_cast_fp16)[name = string("x_203_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1892_to_fp16 = const()[name = string("op_1892_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(9843776)))];
+            tensor<fp16, [1280]> var_1893_to_fp16 = const()[name = string("op_1893_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13120640)))];
+            tensor<fp16, [1, 1500, 1280]> linear_99_cast_fp16 = linear(bias = var_1893_to_fp16, weight = var_1892_to_fp16, x = x_203_cast_fp16)[name = string("linear_99_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_205_cast_fp16 = add(x = x_199_cast_fp16, y = linear_99_cast_fp16)[name = string("x_205_cast_fp16")];
+            tensor<int32, [1]> var_1900_axes_0 = const()[name = string("op_1900_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13123264)))];
+            tensor<fp16, [1280]> blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13125888)))];
+            tensor<fp16, [1, 1500, 1280]> var_1900_cast_fp16 = layer_norm(axes = var_1900_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_1826_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_205_cast_fp16)[name = string("op_1900_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1909_to_fp16 = const()[name = string("op_1909_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13128512)))];
+            tensor<fp16, [5120]> var_1910_to_fp16 = const()[name = string("op_1910_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(26235776)))];
+            tensor<fp16, [1, 1500, 5120]> linear_100_cast_fp16 = linear(bias = var_1910_to_fp16, weight = var_1909_to_fp16, x = var_1900_cast_fp16)[name = string("linear_100_cast_fp16")];
+            string x_209_mode_0 = const()[name = string("x_209_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_209_cast_fp16 = gelu(mode = x_209_mode_0, x = linear_100_cast_fp16)[name = string("x_209_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(26246080)))];
+            tensor<fp16, [1280]> var_1916_to_fp16 = const()[name = string("op_1916_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39353344)))];
+            tensor<fp16, [1, 1500, 1280]> linear_101_cast_fp16 = linear(bias = var_1916_to_fp16, weight = var_1915_to_fp16, x = x_209_cast_fp16)[name = string("linear_101_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_211_cast_fp16 = add(x = x_205_cast_fp16, y = linear_101_cast_fp16)[name = string("x_211_cast_fp16")];
+            int32 var_1926 = const()[name = string("op_1926"), val = int32(-1)];
+            tensor<int32, [1]> var_1942_axes_0 = const()[name = string("op_1942_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39355968)))];
+            tensor<fp16, [1280]> blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39358592)))];
+            fp16 var_1932_to_fp16 = const()[name = string("op_1932_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1942_cast_fp16 = layer_norm(axes = var_1942_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_1932_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_211_cast_fp16)[name = string("op_1942_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39361216)))];
+            tensor<fp16, [1280]> var_1954_to_fp16 = const()[name = string("op_1954_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(42638080)))];
+            tensor<fp16, [1, 1500, 1280]> linear_102_cast_fp16 = linear(bias = var_1954_to_fp16, weight = var_1953_to_fp16, x = var_1942_cast_fp16)[name = string("linear_102_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1957_to_fp16 = const()[name = string("op_1957_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(42640704)))];
+            tensor<fp16, [1, 1500, 1280]> linear_103_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1957_to_fp16, x = var_1942_cast_fp16)[name = string("linear_103_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1961_to_fp16 = const()[name = string("op_1961_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(45917568)))];
+            tensor<fp16, [1280]> var_1962_to_fp16 = const()[name = string("op_1962_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(49194432)))];
+            tensor<fp16, [1, 1500, 1280]> linear_104_cast_fp16 = linear(bias = var_1962_to_fp16, weight = var_1961_to_fp16, x = var_1942_cast_fp16)[name = string("linear_104_cast_fp16")];
+            tensor<int32, [4]> var_1970 = const()[name = string("op_1970"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1971_cast_fp16 = reshape(shape = var_1970, x = linear_102_cast_fp16)[name = string("op_1971_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_71_cast_fp16 = mul(x = var_1971_cast_fp16, y = const_258_to_fp16)[name = string("q_71_cast_fp16")];
+            tensor<int32, [4]> var_1977 = const()[name = string("op_1977"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1978_cast_fp16 = reshape(shape = var_1977, x = linear_103_cast_fp16)[name = string("op_1978_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_259_to_fp16 = const()[name = string("const_259_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_71_cast_fp16 = mul(x = var_1978_cast_fp16, y = const_259_to_fp16)[name = string("k_71_cast_fp16")];
+            tensor<int32, [4]> var_1984 = const()[name = string("op_1984"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1985_cast_fp16 = reshape(shape = var_1984, x = linear_104_cast_fp16)[name = string("op_1985_cast_fp16")];
+            tensor<int32, [4]> var_1986 = const()[name = string("op_1986"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)];
+            bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_67 = transpose(perm = transpose_67_perm_0, x = k_71_cast_fp16)[name = string("transpose_154")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_66 = transpose(perm = transpose_66_perm_0, x = q_71_cast_fp16)[name = string("transpose_155")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("qk_35_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1990_cast_fp16 = softmax(axis = var_1926, x = qk_35_cast_fp16)[name = string("op_1990_cast_fp16")];
+            bool var_1992_transpose_x_0 = const()[name = string("op_1992_transpose_x_0"), val = bool(false)];
+            bool var_1992_transpose_y_0 = const()[name = string("op_1992_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_71_cast_fp16 = transpose(perm = var_1986, x = var_1985_cast_fp16)[name = string("transpose_153")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1992_cast_fp16 = matmul(transpose_x = var_1992_transpose_x_0, transpose_y = var_1992_transpose_y_0, x = var_1990_cast_fp16, y = v_71_cast_fp16)[name = string("op_1992_cast_fp16")];
+            tensor<int32, [4]> var_1993 = const()[name = string("op_1993"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1994_cast_fp16 = transpose(perm = var_1993, x = var_1992_cast_fp16)[name = string("transpose_152")];
+            tensor<fp16, [1, 1500, 1280]> x_215_cast_fp16 = reshape(shape = concat_17, x = var_1994_cast_fp16)[name = string("x_215_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1998_to_fp16 = const()[name = string("op_1998_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(49197056)))];
+            tensor<fp16, [1280]> var_1999_to_fp16 = const()[name = string("op_1999_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52473920)))];
+            tensor<fp16, [1, 1500, 1280]> linear_105_cast_fp16 = linear(bias = var_1999_to_fp16, weight = var_1998_to_fp16, x = x_215_cast_fp16)[name = string("linear_105_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_217_cast_fp16 = add(x = x_211_cast_fp16, y = linear_105_cast_fp16)[name = string("x_217_cast_fp16")];
+            tensor<int32, [1]> var_2006_axes_0 = const()[name = string("op_2006_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52476544)))];
+            tensor<fp16, [1280]> blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52479168)))];
+            tensor<fp16, [1, 1500, 1280]> var_2006_cast_fp16 = layer_norm(axes = var_2006_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_1932_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_217_cast_fp16)[name = string("op_2006_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2015_to_fp16 = const()[name = string("op_2015_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52481792)))];
+            tensor<fp16, [5120]> var_2016_to_fp16 = const()[name = string("op_2016_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(65589056)))];
+            tensor<fp16, [1, 1500, 5120]> linear_106_cast_fp16 = linear(bias = var_2016_to_fp16, weight = var_2015_to_fp16, x = var_2006_cast_fp16)[name = string("linear_106_cast_fp16")];
+            string x_221_mode_0 = const()[name = string("x_221_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_221_cast_fp16 = gelu(mode = x_221_mode_0, x = linear_106_cast_fp16)[name = string("x_221_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2021_to_fp16 = const()[name = string("op_2021_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(65599360)))];
+            tensor<fp16, [1280]> var_2022_to_fp16 = const()[name = string("op_2022_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78706624)))];
+            tensor<fp16, [1, 1500, 1280]> linear_107_cast_fp16 = linear(bias = var_2022_to_fp16, weight = var_2021_to_fp16, x = x_221_cast_fp16)[name = string("linear_107_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_223_cast_fp16 = add(x = x_217_cast_fp16, y = linear_107_cast_fp16)[name = string("x_223_cast_fp16")];
+            int32 var_2032 = const()[name = string("op_2032"), val = int32(-1)];
+            tensor<int32, [1]> var_2048_axes_0 = const()[name = string("op_2048_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78709248)))];
+            tensor<fp16, [1280]> blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78711872)))];
+            fp16 var_2038_to_fp16 = const()[name = string("op_2038_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2048_cast_fp16 = layer_norm(axes = var_2048_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_2038_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_223_cast_fp16)[name = string("op_2048_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2059_to_fp16 = const()[name = string("op_2059_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78714496)))];
+            tensor<fp16, [1280]> var_2060_to_fp16 = const()[name = string("op_2060_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(81991360)))];
+            tensor<fp16, [1, 1500, 1280]> linear_108_cast_fp16 = linear(bias = var_2060_to_fp16, weight = var_2059_to_fp16, x = var_2048_cast_fp16)[name = string("linear_108_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2063_to_fp16 = const()[name = string("op_2063_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(81993984)))];
+            tensor<fp16, [1, 1500, 1280]> linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2063_to_fp16, x = var_2048_cast_fp16)[name = string("linear_109_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2067_to_fp16 = const()[name = string("op_2067_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(85270848)))];
+            tensor<fp16, [1280]> var_2068_to_fp16 = const()[name = string("op_2068_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(88547712)))];
+            tensor<fp16, [1, 1500, 1280]> linear_110_cast_fp16 = linear(bias = var_2068_to_fp16, weight = var_2067_to_fp16, x = var_2048_cast_fp16)[name = string("linear_110_cast_fp16")];
+            tensor<int32, [4]> var_2076 = const()[name = string("op_2076"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2077_cast_fp16 = reshape(shape = var_2076, x = linear_108_cast_fp16)[name = string("op_2077_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_75_cast_fp16 = mul(x = var_2077_cast_fp16, y = const_260_to_fp16)[name = string("q_75_cast_fp16")];
+            tensor<int32, [4]> var_2083 = const()[name = string("op_2083"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2084_cast_fp16 = reshape(shape = var_2083, x = linear_109_cast_fp16)[name = string("op_2084_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_75_cast_fp16 = mul(x = var_2084_cast_fp16, y = const_261_to_fp16)[name = string("k_75_cast_fp16")];
+            tensor<int32, [4]> var_2090 = const()[name = string("op_2090"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2091_cast_fp16 = reshape(shape = var_2090, x = linear_110_cast_fp16)[name = string("op_2091_cast_fp16")];
+            tensor<int32, [4]> var_2092 = const()[name = string("op_2092"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)];
+            bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_69 = transpose(perm = transpose_69_perm_0, x = k_75_cast_fp16)[name = string("transpose_150")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_68 = transpose(perm = transpose_68_perm_0, x = q_75_cast_fp16)[name = string("transpose_151")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("qk_37_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2096_cast_fp16 = softmax(axis = var_2032, x = qk_37_cast_fp16)[name = string("op_2096_cast_fp16")];
+            bool var_2098_transpose_x_0 = const()[name = string("op_2098_transpose_x_0"), val = bool(false)];
+            bool var_2098_transpose_y_0 = const()[name = string("op_2098_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_75_cast_fp16 = transpose(perm = var_2092, x = var_2091_cast_fp16)[name = string("transpose_149")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2098_cast_fp16 = matmul(transpose_x = var_2098_transpose_x_0, transpose_y = var_2098_transpose_y_0, x = var_2096_cast_fp16, y = v_75_cast_fp16)[name = string("op_2098_cast_fp16")];
+            tensor<int32, [4]> var_2099 = const()[name = string("op_2099"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2100_cast_fp16 = transpose(perm = var_2099, x = var_2098_cast_fp16)[name = string("transpose_148")];
+            tensor<fp16, [1, 1500, 1280]> x_227_cast_fp16 = reshape(shape = concat_18, x = var_2100_cast_fp16)[name = string("x_227_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2104_to_fp16 = const()[name = string("op_2104_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(88550336)))];
+            tensor<fp16, [1280]> var_2105_to_fp16 = const()[name = string("op_2105_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91827200)))];
+            tensor<fp16, [1, 1500, 1280]> linear_111_cast_fp16 = linear(bias = var_2105_to_fp16, weight = var_2104_to_fp16, x = x_227_cast_fp16)[name = string("linear_111_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_229_cast_fp16 = add(x = x_223_cast_fp16, y = linear_111_cast_fp16)[name = string("x_229_cast_fp16")];
+            tensor<int32, [1]> var_2112_axes_0 = const()[name = string("op_2112_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91829824)))];
+            tensor<fp16, [1280]> blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91832448)))];
+            tensor<fp16, [1, 1500, 1280]> var_2112_cast_fp16 = layer_norm(axes = var_2112_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_2038_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_229_cast_fp16)[name = string("op_2112_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2121_to_fp16 = const()[name = string("op_2121_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91835072)))];
+            tensor<fp16, [5120]> var_2122_to_fp16 = const()[name = string("op_2122_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(104942336)))];
+            tensor<fp16, [1, 1500, 5120]> linear_112_cast_fp16 = linear(bias = var_2122_to_fp16, weight = var_2121_to_fp16, x = var_2112_cast_fp16)[name = string("linear_112_cast_fp16")];
+            string x_233_mode_0 = const()[name = string("x_233_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_233_cast_fp16 = gelu(mode = x_233_mode_0, x = linear_112_cast_fp16)[name = string("x_233_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2127_to_fp16 = const()[name = string("op_2127_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(104952640)))];
+            tensor<fp16, [1280]> var_2128_to_fp16 = const()[name = string("op_2128_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118059904)))];
+            tensor<fp16, [1, 1500, 1280]> linear_113_cast_fp16 = linear(bias = var_2128_to_fp16, weight = var_2127_to_fp16, x = x_233_cast_fp16)[name = string("linear_113_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_235_cast_fp16 = add(x = x_229_cast_fp16, y = linear_113_cast_fp16)[name = string("x_235_cast_fp16")];
+            int32 var_2138 = const()[name = string("op_2138"), val = int32(-1)];
+            tensor<int32, [1]> var_2154_axes_0 = const()[name = string("op_2154_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118062528)))];
+            tensor<fp16, [1280]> blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118065152)))];
+            fp16 var_2144_to_fp16 = const()[name = string("op_2144_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2154_cast_fp16 = layer_norm(axes = var_2154_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_2144_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_235_cast_fp16)[name = string("op_2154_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2165_to_fp16 = const()[name = string("op_2165_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118067776)))];
+            tensor<fp16, [1280]> var_2166_to_fp16 = const()[name = string("op_2166_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(121344640)))];
+            tensor<fp16, [1, 1500, 1280]> linear_114_cast_fp16 = linear(bias = var_2166_to_fp16, weight = var_2165_to_fp16, x = var_2154_cast_fp16)[name = string("linear_114_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2169_to_fp16 = const()[name = string("op_2169_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(121347264)))];
+            tensor<fp16, [1, 1500, 1280]> linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2169_to_fp16, x = var_2154_cast_fp16)[name = string("linear_115_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2173_to_fp16 = const()[name = string("op_2173_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(124624128)))];
+            tensor<fp16, [1280]> var_2174_to_fp16 = const()[name = string("op_2174_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(127900992)))];
+            tensor<fp16, [1, 1500, 1280]> linear_116_cast_fp16 = linear(bias = var_2174_to_fp16, weight = var_2173_to_fp16, x = var_2154_cast_fp16)[name = string("linear_116_cast_fp16")];
+            tensor<int32, [4]> var_2182 = const()[name = string("op_2182"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2183_cast_fp16 = reshape(shape = var_2182, x = linear_114_cast_fp16)[name = string("op_2183_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_262_to_fp16 = const()[name = string("const_262_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_79_cast_fp16 = mul(x = var_2183_cast_fp16, y = const_262_to_fp16)[name = string("q_79_cast_fp16")];
+            tensor<int32, [4]> var_2189 = const()[name = string("op_2189"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2190_cast_fp16 = reshape(shape = var_2189, x = linear_115_cast_fp16)[name = string("op_2190_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_79_cast_fp16 = mul(x = var_2190_cast_fp16, y = const_263_to_fp16)[name = string("k_79_cast_fp16")];
+            tensor<int32, [4]> var_2196 = const()[name = string("op_2196"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2197_cast_fp16 = reshape(shape = var_2196, x = linear_116_cast_fp16)[name = string("op_2197_cast_fp16")];
+            tensor<int32, [4]> var_2198 = const()[name = string("op_2198"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_39_transpose_x_0 = const()[name = string("qk_39_transpose_x_0"), val = bool(false)];
+            bool qk_39_transpose_y_0 = const()[name = string("qk_39_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_71 = transpose(perm = transpose_71_perm_0, x = k_79_cast_fp16)[name = string("transpose_146")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_70 = transpose(perm = transpose_70_perm_0, x = q_79_cast_fp16)[name = string("transpose_147")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_39_cast_fp16 = matmul(transpose_x = qk_39_transpose_x_0, transpose_y = qk_39_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("qk_39_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2202_cast_fp16 = softmax(axis = var_2138, x = qk_39_cast_fp16)[name = string("op_2202_cast_fp16")];
+            bool var_2204_transpose_x_0 = const()[name = string("op_2204_transpose_x_0"), val = bool(false)];
+            bool var_2204_transpose_y_0 = const()[name = string("op_2204_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_79_cast_fp16 = transpose(perm = var_2198, x = var_2197_cast_fp16)[name = string("transpose_145")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2204_cast_fp16 = matmul(transpose_x = var_2204_transpose_x_0, transpose_y = var_2204_transpose_y_0, x = var_2202_cast_fp16, y = v_79_cast_fp16)[name = string("op_2204_cast_fp16")];
+            tensor<int32, [4]> var_2205 = const()[name = string("op_2205"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2206_cast_fp16 = transpose(perm = var_2205, x = var_2204_cast_fp16)[name = string("transpose_144")];
+            tensor<fp16, [1, 1500, 1280]> x_239_cast_fp16 = reshape(shape = concat_19, x = var_2206_cast_fp16)[name = string("x_239_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2210_to_fp16 = const()[name = string("op_2210_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(127903616)))];
+            tensor<fp16, [1280]> var_2211_to_fp16 = const()[name = string("op_2211_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131180480)))];
+            tensor<fp16, [1, 1500, 1280]> linear_117_cast_fp16 = linear(bias = var_2211_to_fp16, weight = var_2210_to_fp16, x = x_239_cast_fp16)[name = string("linear_117_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_241_cast_fp16 = add(x = x_235_cast_fp16, y = linear_117_cast_fp16)[name = string("x_241_cast_fp16")];
+            tensor<int32, [1]> var_2218_axes_0 = const()[name = string("op_2218_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131183104)))];
+            tensor<fp16, [1280]> blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131185728)))];
+            tensor<fp16, [1, 1500, 1280]> var_2218_cast_fp16 = layer_norm(axes = var_2218_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_2144_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_241_cast_fp16)[name = string("op_2218_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2227_to_fp16 = const()[name = string("op_2227_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131188352)))];
+            tensor<fp16, [5120]> var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(144295616)))];
+            tensor<fp16, [1, 1500, 5120]> linear_118_cast_fp16 = linear(bias = var_2228_to_fp16, weight = var_2227_to_fp16, x = var_2218_cast_fp16)[name = string("linear_118_cast_fp16")];
+            string x_245_mode_0 = const()[name = string("x_245_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_245_cast_fp16 = gelu(mode = x_245_mode_0, x = linear_118_cast_fp16)[name = string("x_245_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2233_to_fp16 = const()[name = string("op_2233_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(144305920)))];
+            tensor<fp16, [1280]> var_2234_to_fp16 = const()[name = string("op_2234_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157413184)))];
+            tensor<fp16, [1, 1500, 1280]> linear_119_cast_fp16 = linear(bias = var_2234_to_fp16, weight = var_2233_to_fp16, x = x_245_cast_fp16)[name = string("linear_119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_247_cast_fp16 = add(x = x_241_cast_fp16, y = linear_119_cast_fp16)[name = string("x_247_cast_fp16")];
+            int32 var_2244 = const()[name = string("op_2244"), val = int32(-1)];
+            tensor<int32, [1]> var_2260_axes_0 = const()[name = string("op_2260_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157415808)))];
+            tensor<fp16, [1280]> blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157418432)))];
+            fp16 var_2250_to_fp16 = const()[name = string("op_2250_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2260_cast_fp16 = layer_norm(axes = var_2260_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_2250_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_247_cast_fp16)[name = string("op_2260_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2271_to_fp16 = const()[name = string("op_2271_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157421056)))];
+            tensor<fp16, [1280]> var_2272_to_fp16 = const()[name = string("op_2272_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(160697920)))];
+            tensor<fp16, [1, 1500, 1280]> linear_120_cast_fp16 = linear(bias = var_2272_to_fp16, weight = var_2271_to_fp16, x = var_2260_cast_fp16)[name = string("linear_120_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2275_to_fp16 = const()[name = string("op_2275_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(160700544)))];
+            tensor<fp16, [1, 1500, 1280]> linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2275_to_fp16, x = var_2260_cast_fp16)[name = string("linear_121_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2279_to_fp16 = const()[name = string("op_2279_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(163977408)))];
+            tensor<fp16, [1280]> var_2280_to_fp16 = const()[name = string("op_2280_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(167254272)))];
+            tensor<fp16, [1, 1500, 1280]> linear_122_cast_fp16 = linear(bias = var_2280_to_fp16, weight = var_2279_to_fp16, x = var_2260_cast_fp16)[name = string("linear_122_cast_fp16")];
+            tensor<int32, [4]> var_2288 = const()[name = string("op_2288"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2289_cast_fp16 = reshape(shape = var_2288, x = linear_120_cast_fp16)[name = string("op_2289_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_264_to_fp16 = const()[name = string("const_264_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_83_cast_fp16 = mul(x = var_2289_cast_fp16, y = const_264_to_fp16)[name = string("q_83_cast_fp16")];
+            tensor<int32, [4]> var_2295 = const()[name = string("op_2295"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2296_cast_fp16 = reshape(shape = var_2295, x = linear_121_cast_fp16)[name = string("op_2296_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_265_to_fp16 = const()[name = string("const_265_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_83_cast_fp16 = mul(x = var_2296_cast_fp16, y = const_265_to_fp16)[name = string("k_83_cast_fp16")];
+            tensor<int32, [4]> var_2302 = const()[name = string("op_2302"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2303_cast_fp16 = reshape(shape = var_2302, x = linear_122_cast_fp16)[name = string("op_2303_cast_fp16")];
+            tensor<int32, [4]> var_2304 = const()[name = string("op_2304"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)];
+            bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_73 = transpose(perm = transpose_73_perm_0, x = k_83_cast_fp16)[name = string("transpose_142")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = q_83_cast_fp16)[name = string("transpose_143")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_72, y = transpose_73)[name = string("qk_41_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2308_cast_fp16 = softmax(axis = var_2244, x = qk_41_cast_fp16)[name = string("op_2308_cast_fp16")];
+            bool var_2310_transpose_x_0 = const()[name = string("op_2310_transpose_x_0"), val = bool(false)];
+            bool var_2310_transpose_y_0 = const()[name = string("op_2310_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_83_cast_fp16 = transpose(perm = var_2304, x = var_2303_cast_fp16)[name = string("transpose_141")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2310_cast_fp16 = matmul(transpose_x = var_2310_transpose_x_0, transpose_y = var_2310_transpose_y_0, x = var_2308_cast_fp16, y = v_83_cast_fp16)[name = string("op_2310_cast_fp16")];
+            tensor<int32, [4]> var_2311 = const()[name = string("op_2311"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2312_cast_fp16 = transpose(perm = var_2311, x = var_2310_cast_fp16)[name = string("transpose_140")];
+            tensor<fp16, [1, 1500, 1280]> x_251_cast_fp16 = reshape(shape = concat_20, x = var_2312_cast_fp16)[name = string("x_251_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2316_to_fp16 = const()[name = string("op_2316_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(167256896)))];
+            tensor<fp16, [1280]> var_2317_to_fp16 = const()[name = string("op_2317_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170533760)))];
+            tensor<fp16, [1, 1500, 1280]> linear_123_cast_fp16 = linear(bias = var_2317_to_fp16, weight = var_2316_to_fp16, x = x_251_cast_fp16)[name = string("linear_123_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_253_cast_fp16 = add(x = x_247_cast_fp16, y = linear_123_cast_fp16)[name = string("x_253_cast_fp16")];
+            tensor<int32, [1]> var_2324_axes_0 = const()[name = string("op_2324_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170536384)))];
+            tensor<fp16, [1280]> blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170539008)))];
+            tensor<fp16, [1, 1500, 1280]> var_2324_cast_fp16 = layer_norm(axes = var_2324_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_2250_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_253_cast_fp16)[name = string("op_2324_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2333_to_fp16 = const()[name = string("op_2333_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170541632)))];
+            tensor<fp16, [5120]> var_2334_to_fp16 = const()[name = string("op_2334_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(183648896)))];
+            tensor<fp16, [1, 1500, 5120]> linear_124_cast_fp16 = linear(bias = var_2334_to_fp16, weight = var_2333_to_fp16, x = var_2324_cast_fp16)[name = string("linear_124_cast_fp16")];
+            string x_257_mode_0 = const()[name = string("x_257_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_257_cast_fp16 = gelu(mode = x_257_mode_0, x = linear_124_cast_fp16)[name = string("x_257_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2339_to_fp16 = const()[name = string("op_2339_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(183659200)))];
+            tensor<fp16, [1280]> var_2340_to_fp16 = const()[name = string("op_2340_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196766464)))];
+            tensor<fp16, [1, 1500, 1280]> linear_125_cast_fp16 = linear(bias = var_2340_to_fp16, weight = var_2339_to_fp16, x = x_257_cast_fp16)[name = string("linear_125_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_259_cast_fp16 = add(x = x_253_cast_fp16, y = linear_125_cast_fp16)[name = string("x_259_cast_fp16")];
+            int32 var_2350 = const()[name = string("op_2350"), val = int32(-1)];
+            tensor<int32, [1]> var_2366_axes_0 = const()[name = string("op_2366_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196769088)))];
+            tensor<fp16, [1280]> blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196771712)))];
+            fp16 var_2356_to_fp16 = const()[name = string("op_2356_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2366_cast_fp16 = layer_norm(axes = var_2366_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_2356_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_259_cast_fp16)[name = string("op_2366_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2377_to_fp16 = const()[name = string("op_2377_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196774336)))];
+            tensor<fp16, [1280]> var_2378_to_fp16 = const()[name = string("op_2378_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(200051200)))];
+            tensor<fp16, [1, 1500, 1280]> linear_126_cast_fp16 = linear(bias = var_2378_to_fp16, weight = var_2377_to_fp16, x = var_2366_cast_fp16)[name = string("linear_126_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2381_to_fp16 = const()[name = string("op_2381_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(200053824)))];
+            tensor<fp16, [1, 1500, 1280]> linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2381_to_fp16, x = var_2366_cast_fp16)[name = string("linear_127_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2385_to_fp16 = const()[name = string("op_2385_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(203330688)))];
+            tensor<fp16, [1280]> var_2386_to_fp16 = const()[name = string("op_2386_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(206607552)))];
+            tensor<fp16, [1, 1500, 1280]> linear_128_cast_fp16 = linear(bias = var_2386_to_fp16, weight = var_2385_to_fp16, x = var_2366_cast_fp16)[name = string("linear_128_cast_fp16")];
+            tensor<int32, [4]> var_2394 = const()[name = string("op_2394"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2395_cast_fp16 = reshape(shape = var_2394, x = linear_126_cast_fp16)[name = string("op_2395_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_266_to_fp16 = const()[name = string("const_266_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_87_cast_fp16 = mul(x = var_2395_cast_fp16, y = const_266_to_fp16)[name = string("q_87_cast_fp16")];
+            tensor<int32, [4]> var_2401 = const()[name = string("op_2401"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2402_cast_fp16 = reshape(shape = var_2401, x = linear_127_cast_fp16)[name = string("op_2402_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_87_cast_fp16 = mul(x = var_2402_cast_fp16, y = const_267_to_fp16)[name = string("k_87_cast_fp16")];
+            tensor<int32, [4]> var_2408 = const()[name = string("op_2408"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2409_cast_fp16 = reshape(shape = var_2408, x = linear_128_cast_fp16)[name = string("op_2409_cast_fp16")];
+            tensor<int32, [4]> var_2410 = const()[name = string("op_2410"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)];
+            bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_75 = transpose(perm = transpose_75_perm_0, x = k_87_cast_fp16)[name = string("transpose_138")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_74 = transpose(perm = transpose_74_perm_0, x = q_87_cast_fp16)[name = string("transpose_139")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_74, y = transpose_75)[name = string("qk_43_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2414_cast_fp16 = softmax(axis = var_2350, x = qk_43_cast_fp16)[name = string("op_2414_cast_fp16")];
+            bool var_2416_transpose_x_0 = const()[name = string("op_2416_transpose_x_0"), val = bool(false)];
+            bool var_2416_transpose_y_0 = const()[name = string("op_2416_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_87_cast_fp16 = transpose(perm = var_2410, x = var_2409_cast_fp16)[name = string("transpose_137")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2416_cast_fp16 = matmul(transpose_x = var_2416_transpose_x_0, transpose_y = var_2416_transpose_y_0, x = var_2414_cast_fp16, y = v_87_cast_fp16)[name = string("op_2416_cast_fp16")];
+            tensor<int32, [4]> var_2417 = const()[name = string("op_2417"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2418_cast_fp16 = transpose(perm = var_2417, x = var_2416_cast_fp16)[name = string("transpose_136")];
+            tensor<fp16, [1, 1500, 1280]> x_263_cast_fp16 = reshape(shape = concat_21, x = var_2418_cast_fp16)[name = string("x_263_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2422_to_fp16 = const()[name = string("op_2422_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(206610176)))];
+            tensor<fp16, [1280]> var_2423_to_fp16 = const()[name = string("op_2423_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209887040)))];
+            tensor<fp16, [1, 1500, 1280]> linear_129_cast_fp16 = linear(bias = var_2423_to_fp16, weight = var_2422_to_fp16, x = x_263_cast_fp16)[name = string("linear_129_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_265_cast_fp16 = add(x = x_259_cast_fp16, y = linear_129_cast_fp16)[name = string("x_265_cast_fp16")];
+            tensor<int32, [1]> var_2430_axes_0 = const()[name = string("op_2430_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209889664)))];
+            tensor<fp16, [1280]> blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209892288)))];
+            tensor<fp16, [1, 1500, 1280]> var_2430_cast_fp16 = layer_norm(axes = var_2430_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_2356_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_265_cast_fp16)[name = string("op_2430_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2439_to_fp16 = const()[name = string("op_2439_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209894912)))];
+            tensor<fp16, [5120]> var_2440_to_fp16 = const()[name = string("op_2440_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(223002176)))];
+            tensor<fp16, [1, 1500, 5120]> linear_130_cast_fp16 = linear(bias = var_2440_to_fp16, weight = var_2439_to_fp16, x = var_2430_cast_fp16)[name = string("linear_130_cast_fp16")];
+            string x_269_mode_0 = const()[name = string("x_269_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_269_cast_fp16 = gelu(mode = x_269_mode_0, x = linear_130_cast_fp16)[name = string("x_269_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2445_to_fp16 = const()[name = string("op_2445_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(223012480)))];
+            tensor<fp16, [1280]> var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236119744)))];
+            tensor<fp16, [1, 1500, 1280]> linear_131_cast_fp16 = linear(bias = var_2446_to_fp16, weight = var_2445_to_fp16, x = x_269_cast_fp16)[name = string("linear_131_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_271_cast_fp16 = add(x = x_265_cast_fp16, y = linear_131_cast_fp16)[name = string("x_271_cast_fp16")];
+            int32 var_2456 = const()[name = string("op_2456"), val = int32(-1)];
+            tensor<int32, [1]> var_2472_axes_0 = const()[name = string("op_2472_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236122368)))];
+            tensor<fp16, [1280]> blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236124992)))];
+            fp16 var_2462_to_fp16 = const()[name = string("op_2462_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2472_cast_fp16 = layer_norm(axes = var_2472_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_2462_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_271_cast_fp16)[name = string("op_2472_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2483_to_fp16 = const()[name = string("op_2483_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236127616)))];
+            tensor<fp16, [1280]> var_2484_to_fp16 = const()[name = string("op_2484_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(239404480)))];
+            tensor<fp16, [1, 1500, 1280]> linear_132_cast_fp16 = linear(bias = var_2484_to_fp16, weight = var_2483_to_fp16, x = var_2472_cast_fp16)[name = string("linear_132_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2487_to_fp16 = const()[name = string("op_2487_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(239407104)))];
+            tensor<fp16, [1, 1500, 1280]> linear_133_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2487_to_fp16, x = var_2472_cast_fp16)[name = string("linear_133_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2491_to_fp16 = const()[name = string("op_2491_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(242683968)))];
+            tensor<fp16, [1280]> var_2492_to_fp16 = const()[name = string("op_2492_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(245960832)))];
+            tensor<fp16, [1, 1500, 1280]> linear_134_cast_fp16 = linear(bias = var_2492_to_fp16, weight = var_2491_to_fp16, x = var_2472_cast_fp16)[name = string("linear_134_cast_fp16")];
+            tensor<int32, [4]> var_2500 = const()[name = string("op_2500"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2501_cast_fp16 = reshape(shape = var_2500, x = linear_132_cast_fp16)[name = string("op_2501_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_268_to_fp16 = const()[name = string("const_268_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_91_cast_fp16 = mul(x = var_2501_cast_fp16, y = const_268_to_fp16)[name = string("q_91_cast_fp16")];
+            tensor<int32, [4]> var_2507 = const()[name = string("op_2507"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2508_cast_fp16 = reshape(shape = var_2507, x = linear_133_cast_fp16)[name = string("op_2508_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_91_cast_fp16 = mul(x = var_2508_cast_fp16, y = const_269_to_fp16)[name = string("k_91_cast_fp16")];
+            tensor<int32, [4]> var_2514 = const()[name = string("op_2514"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2515_cast_fp16 = reshape(shape = var_2514, x = linear_134_cast_fp16)[name = string("op_2515_cast_fp16")];
+            tensor<int32, [4]> var_2516 = const()[name = string("op_2516"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_45_transpose_x_0 = const()[name = string("qk_45_transpose_x_0"), val = bool(false)];
+            bool qk_45_transpose_y_0 = const()[name = string("qk_45_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_77 = transpose(perm = transpose_77_perm_0, x = k_91_cast_fp16)[name = string("transpose_134")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = q_91_cast_fp16)[name = string("transpose_135")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_45_cast_fp16 = matmul(transpose_x = qk_45_transpose_x_0, transpose_y = qk_45_transpose_y_0, x = transpose_76, y = transpose_77)[name = string("qk_45_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2520_cast_fp16 = softmax(axis = var_2456, x = qk_45_cast_fp16)[name = string("op_2520_cast_fp16")];
+            bool var_2522_transpose_x_0 = const()[name = string("op_2522_transpose_x_0"), val = bool(false)];
+            bool var_2522_transpose_y_0 = const()[name = string("op_2522_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_91_cast_fp16 = transpose(perm = var_2516, x = var_2515_cast_fp16)[name = string("transpose_133")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2522_cast_fp16 = matmul(transpose_x = var_2522_transpose_x_0, transpose_y = var_2522_transpose_y_0, x = var_2520_cast_fp16, y = v_91_cast_fp16)[name = string("op_2522_cast_fp16")];
+            tensor<int32, [4]> var_2523 = const()[name = string("op_2523"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_22 = const()[name = string("concat_22"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2524_cast_fp16 = transpose(perm = var_2523, x = var_2522_cast_fp16)[name = string("transpose_132")];
+            tensor<fp16, [1, 1500, 1280]> x_275_cast_fp16 = reshape(shape = concat_22, x = var_2524_cast_fp16)[name = string("x_275_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2528_to_fp16 = const()[name = string("op_2528_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(245963456)))];
+            tensor<fp16, [1280]> var_2529_to_fp16 = const()[name = string("op_2529_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249240320)))];
+            tensor<fp16, [1, 1500, 1280]> linear_135_cast_fp16 = linear(bias = var_2529_to_fp16, weight = var_2528_to_fp16, x = x_275_cast_fp16)[name = string("linear_135_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_277_cast_fp16 = add(x = x_271_cast_fp16, y = linear_135_cast_fp16)[name = string("x_277_cast_fp16")];
+            tensor<int32, [1]> var_2536_axes_0 = const()[name = string("op_2536_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249242944)))];
+            tensor<fp16, [1280]> blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249245568)))];
+            tensor<fp16, [1, 1500, 1280]> var_2536_cast_fp16 = layer_norm(axes = var_2536_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_2462_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_277_cast_fp16)[name = string("op_2536_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2545_to_fp16 = const()[name = string("op_2545_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249248192)))];
+            tensor<fp16, [5120]> var_2546_to_fp16 = const()[name = string("op_2546_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(262355456)))];
+            tensor<fp16, [1, 1500, 5120]> linear_136_cast_fp16 = linear(bias = var_2546_to_fp16, weight = var_2545_to_fp16, x = var_2536_cast_fp16)[name = string("linear_136_cast_fp16")];
+            string x_281_mode_0 = const()[name = string("x_281_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_281_cast_fp16 = gelu(mode = x_281_mode_0, x = linear_136_cast_fp16)[name = string("x_281_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2551_to_fp16 = const()[name = string("op_2551_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(262365760)))];
+            tensor<fp16, [1280]> var_2552_to_fp16 = const()[name = string("op_2552_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275473024)))];
+            tensor<fp16, [1, 1500, 1280]> linear_137_cast_fp16 = linear(bias = var_2552_to_fp16, weight = var_2551_to_fp16, x = x_281_cast_fp16)[name = string("linear_137_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_283_cast_fp16 = add(x = x_277_cast_fp16, y = linear_137_cast_fp16)[name = string("x_283_cast_fp16")];
+            int32 var_2562 = const()[name = string("op_2562"), val = int32(-1)];
+            tensor<int32, [1]> var_2578_axes_0 = const()[name = string("op_2578_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275475648)))];
+            tensor<fp16, [1280]> blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275478272)))];
+            fp16 var_2568_to_fp16 = const()[name = string("op_2568_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2578_cast_fp16 = layer_norm(axes = var_2578_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_2568_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_283_cast_fp16)[name = string("op_2578_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2589_to_fp16 = const()[name = string("op_2589_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275480896)))];
+            tensor<fp16, [1280]> var_2590_to_fp16 = const()[name = string("op_2590_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(278757760)))];
+            tensor<fp16, [1, 1500, 1280]> linear_138_cast_fp16 = linear(bias = var_2590_to_fp16, weight = var_2589_to_fp16, x = var_2578_cast_fp16)[name = string("linear_138_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2593_to_fp16 = const()[name = string("op_2593_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(278760384)))];
+            tensor<fp16, [1, 1500, 1280]> linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2593_to_fp16, x = var_2578_cast_fp16)[name = string("linear_139_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2597_to_fp16 = const()[name = string("op_2597_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(282037248)))];
+            tensor<fp16, [1280]> var_2598_to_fp16 = const()[name = string("op_2598_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(285314112)))];
+            tensor<fp16, [1, 1500, 1280]> linear_140_cast_fp16 = linear(bias = var_2598_to_fp16, weight = var_2597_to_fp16, x = var_2578_cast_fp16)[name = string("linear_140_cast_fp16")];
+            tensor<int32, [4]> var_2606 = const()[name = string("op_2606"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2607_cast_fp16 = reshape(shape = var_2606, x = linear_138_cast_fp16)[name = string("op_2607_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_95_cast_fp16 = mul(x = var_2607_cast_fp16, y = const_270_to_fp16)[name = string("q_95_cast_fp16")];
+            tensor<int32, [4]> var_2613 = const()[name = string("op_2613"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2614_cast_fp16 = reshape(shape = var_2613, x = linear_139_cast_fp16)[name = string("op_2614_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_95_cast_fp16 = mul(x = var_2614_cast_fp16, y = const_271_to_fp16)[name = string("k_95_cast_fp16")];
+            tensor<int32, [4]> var_2620 = const()[name = string("op_2620"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2621_cast_fp16 = reshape(shape = var_2620, x = linear_140_cast_fp16)[name = string("op_2621_cast_fp16")];
+            tensor<int32, [4]> var_2622 = const()[name = string("op_2622"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)];
+            bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_79 = transpose(perm = transpose_79_perm_0, x = k_95_cast_fp16)[name = string("transpose_130")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_78 = transpose(perm = transpose_78_perm_0, x = q_95_cast_fp16)[name = string("transpose_131")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_78, y = transpose_79)[name = string("qk_47_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2626_cast_fp16 = softmax(axis = var_2562, x = qk_47_cast_fp16)[name = string("op_2626_cast_fp16")];
+            bool var_2628_transpose_x_0 = const()[name = string("op_2628_transpose_x_0"), val = bool(false)];
+            bool var_2628_transpose_y_0 = const()[name = string("op_2628_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_95_cast_fp16 = transpose(perm = var_2622, x = var_2621_cast_fp16)[name = string("transpose_129")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2628_cast_fp16 = matmul(transpose_x = var_2628_transpose_x_0, transpose_y = var_2628_transpose_y_0, x = var_2626_cast_fp16, y = v_95_cast_fp16)[name = string("op_2628_cast_fp16")];
+            tensor<int32, [4]> var_2629 = const()[name = string("op_2629"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2630_cast_fp16 = transpose(perm = var_2629, x = var_2628_cast_fp16)[name = string("transpose_128")];
+            tensor<fp16, [1, 1500, 1280]> x_287_cast_fp16 = reshape(shape = concat_23, x = var_2630_cast_fp16)[name = string("x_287_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2634_to_fp16 = const()[name = string("op_2634_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(285316736)))];
+            tensor<fp16, [1280]> var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288593600)))];
+            tensor<fp16, [1, 1500, 1280]> linear_141_cast_fp16 = linear(bias = var_2635_to_fp16, weight = var_2634_to_fp16, x = x_287_cast_fp16)[name = string("linear_141_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_289_cast_fp16 = add(x = x_283_cast_fp16, y = linear_141_cast_fp16)[name = string("x_289_cast_fp16")];
+            tensor<int32, [1]> var_2642_axes_0 = const()[name = string("op_2642_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288596224)))];
+            tensor<fp16, [1280]> blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288598848)))];
+            tensor<fp16, [1, 1500, 1280]> var_2642_cast_fp16 = layer_norm(axes = var_2642_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_2568_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_289_cast_fp16)[name = string("op_2642_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2651_to_fp16 = const()[name = string("op_2651_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288601472)))];
+            tensor<fp16, [5120]> var_2652_to_fp16 = const()[name = string("op_2652_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(301708736)))];
+            tensor<fp16, [1, 1500, 5120]> linear_142_cast_fp16 = linear(bias = var_2652_to_fp16, weight = var_2651_to_fp16, x = var_2642_cast_fp16)[name = string("linear_142_cast_fp16")];
+            string x_293_mode_0 = const()[name = string("x_293_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_293_cast_fp16 = gelu(mode = x_293_mode_0, x = linear_142_cast_fp16)[name = string("x_293_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(301719040)))];
+            tensor<fp16, [1280]> var_2658_to_fp16 = const()[name = string("op_2658_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314826304)))];
+            tensor<fp16, [1, 1500, 1280]> linear_143_cast_fp16 = linear(bias = var_2658_to_fp16, weight = var_2657_to_fp16, x = x_293_cast_fp16)[name = string("linear_143_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_295_cast_fp16 = add(x = x_289_cast_fp16, y = linear_143_cast_fp16)[name = string("x_295_cast_fp16")];
+            int32 var_2668 = const()[name = string("op_2668"), val = int32(-1)];
+            tensor<int32, [1]> var_2684_axes_0 = const()[name = string("op_2684_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_24_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314828928)))];
+            tensor<fp16, [1280]> blocks_24_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314831552)))];
+            fp16 var_2674_to_fp16 = const()[name = string("op_2674_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2684_cast_fp16 = layer_norm(axes = var_2684_axes_0, beta = blocks_24_attn_ln_bias_to_fp16, epsilon = var_2674_to_fp16, gamma = blocks_24_attn_ln_weight_to_fp16, x = x_295_cast_fp16)[name = string("op_2684_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2695_to_fp16 = const()[name = string("op_2695_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314834176)))];
+            tensor<fp16, [1280]> var_2696_to_fp16 = const()[name = string("op_2696_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(318111040)))];
+            tensor<fp16, [1, 1500, 1280]> linear_144_cast_fp16 = linear(bias = var_2696_to_fp16, weight = var_2695_to_fp16, x = var_2684_cast_fp16)[name = string("linear_144_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2699_to_fp16 = const()[name = string("op_2699_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(318113664)))];
+            tensor<fp16, [1, 1500, 1280]> linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2699_to_fp16, x = var_2684_cast_fp16)[name = string("linear_145_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2703_to_fp16 = const()[name = string("op_2703_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(321390528)))];
+            tensor<fp16, [1280]> var_2704_to_fp16 = const()[name = string("op_2704_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(324667392)))];
+            tensor<fp16, [1, 1500, 1280]> linear_146_cast_fp16 = linear(bias = var_2704_to_fp16, weight = var_2703_to_fp16, x = var_2684_cast_fp16)[name = string("linear_146_cast_fp16")];
+            tensor<int32, [4]> var_2712 = const()[name = string("op_2712"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2713_cast_fp16 = reshape(shape = var_2712, x = linear_144_cast_fp16)[name = string("op_2713_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_272_to_fp16 = const()[name = string("const_272_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_99_cast_fp16 = mul(x = var_2713_cast_fp16, y = const_272_to_fp16)[name = string("q_99_cast_fp16")];
+            tensor<int32, [4]> var_2719 = const()[name = string("op_2719"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2720_cast_fp16 = reshape(shape = var_2719, x = linear_145_cast_fp16)[name = string("op_2720_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_99_cast_fp16 = mul(x = var_2720_cast_fp16, y = const_273_to_fp16)[name = string("k_99_cast_fp16")];
+            tensor<int32, [4]> var_2726 = const()[name = string("op_2726"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2727_cast_fp16 = reshape(shape = var_2726, x = linear_146_cast_fp16)[name = string("op_2727_cast_fp16")];
+            tensor<int32, [4]> var_2728 = const()[name = string("op_2728"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)];
+            bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_81 = transpose(perm = transpose_81_perm_0, x = k_99_cast_fp16)[name = string("transpose_126")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = q_99_cast_fp16)[name = string("transpose_127")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_80, y = transpose_81)[name = string("qk_49_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2732_cast_fp16 = softmax(axis = var_2668, x = qk_49_cast_fp16)[name = string("op_2732_cast_fp16")];
+            bool var_2734_transpose_x_0 = const()[name = string("op_2734_transpose_x_0"), val = bool(false)];
+            bool var_2734_transpose_y_0 = const()[name = string("op_2734_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_99_cast_fp16 = transpose(perm = var_2728, x = var_2727_cast_fp16)[name = string("transpose_125")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2734_cast_fp16 = matmul(transpose_x = var_2734_transpose_x_0, transpose_y = var_2734_transpose_y_0, x = var_2732_cast_fp16, y = v_99_cast_fp16)[name = string("op_2734_cast_fp16")];
+            tensor<int32, [4]> var_2735 = const()[name = string("op_2735"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_24 = const()[name = string("concat_24"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2736_cast_fp16 = transpose(perm = var_2735, x = var_2734_cast_fp16)[name = string("transpose_124")];
+            tensor<fp16, [1, 1500, 1280]> x_299_cast_fp16 = reshape(shape = concat_24, x = var_2736_cast_fp16)[name = string("x_299_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2740_to_fp16 = const()[name = string("op_2740_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(324670016)))];
+            tensor<fp16, [1280]> var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327946880)))];
+            tensor<fp16, [1, 1500, 1280]> linear_147_cast_fp16 = linear(bias = var_2741_to_fp16, weight = var_2740_to_fp16, x = x_299_cast_fp16)[name = string("linear_147_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_301_cast_fp16 = add(x = x_295_cast_fp16, y = linear_147_cast_fp16)[name = string("x_301_cast_fp16")];
+            tensor<int32, [1]> var_2748_axes_0 = const()[name = string("op_2748_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_24_mlp_ln_weight_to_fp16 = const()[name = string("blocks_24_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327949504)))];
+            tensor<fp16, [1280]> blocks_24_mlp_ln_bias_to_fp16 = const()[name = string("blocks_24_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327952128)))];
+            tensor<fp16, [1, 1500, 1280]> var_2748_cast_fp16 = layer_norm(axes = var_2748_axes_0, beta = blocks_24_mlp_ln_bias_to_fp16, epsilon = var_2674_to_fp16, gamma = blocks_24_mlp_ln_weight_to_fp16, x = x_301_cast_fp16)[name = string("op_2748_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2757_to_fp16 = const()[name = string("op_2757_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327954752)))];
+            tensor<fp16, [5120]> var_2758_to_fp16 = const()[name = string("op_2758_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(341062016)))];
+            tensor<fp16, [1, 1500, 5120]> linear_148_cast_fp16 = linear(bias = var_2758_to_fp16, weight = var_2757_to_fp16, x = var_2748_cast_fp16)[name = string("linear_148_cast_fp16")];
+            string x_305_mode_0 = const()[name = string("x_305_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_305_cast_fp16 = gelu(mode = x_305_mode_0, x = linear_148_cast_fp16)[name = string("x_305_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2763_to_fp16 = const()[name = string("op_2763_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(341072320)))];
+            tensor<fp16, [1280]> var_2764_to_fp16 = const()[name = string("op_2764_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354179584)))];
+            tensor<fp16, [1, 1500, 1280]> linear_149_cast_fp16 = linear(bias = var_2764_to_fp16, weight = var_2763_to_fp16, x = x_305_cast_fp16)[name = string("linear_149_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_307_cast_fp16 = add(x = x_301_cast_fp16, y = linear_149_cast_fp16)[name = string("x_307_cast_fp16")];
+            int32 var_2774 = const()[name = string("op_2774"), val = int32(-1)];
+            tensor<int32, [1]> var_2790_axes_0 = const()[name = string("op_2790_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_25_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354182208)))];
+            tensor<fp16, [1280]> blocks_25_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354184832)))];
+            fp16 var_2780_to_fp16 = const()[name = string("op_2780_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2790_cast_fp16 = layer_norm(axes = var_2790_axes_0, beta = blocks_25_attn_ln_bias_to_fp16, epsilon = var_2780_to_fp16, gamma = blocks_25_attn_ln_weight_to_fp16, x = x_307_cast_fp16)[name = string("op_2790_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2801_to_fp16 = const()[name = string("op_2801_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354187456)))];
+            tensor<fp16, [1280]> var_2802_to_fp16 = const()[name = string("op_2802_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(357464320)))];
+            tensor<fp16, [1, 1500, 1280]> linear_150_cast_fp16 = linear(bias = var_2802_to_fp16, weight = var_2801_to_fp16, x = var_2790_cast_fp16)[name = string("linear_150_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2805_to_fp16 = const()[name = string("op_2805_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(357466944)))];
+            tensor<fp16, [1, 1500, 1280]> linear_151_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2805_to_fp16, x = var_2790_cast_fp16)[name = string("linear_151_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2809_to_fp16 = const()[name = string("op_2809_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(360743808)))];
+            tensor<fp16, [1280]> var_2810_to_fp16 = const()[name = string("op_2810_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(364020672)))];
+            tensor<fp16, [1, 1500, 1280]> linear_152_cast_fp16 = linear(bias = var_2810_to_fp16, weight = var_2809_to_fp16, x = var_2790_cast_fp16)[name = string("linear_152_cast_fp16")];
+            tensor<int32, [4]> var_2818 = const()[name = string("op_2818"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2819_cast_fp16 = reshape(shape = var_2818, x = linear_150_cast_fp16)[name = string("op_2819_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_274_to_fp16 = const()[name = string("const_274_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_103_cast_fp16 = mul(x = var_2819_cast_fp16, y = const_274_to_fp16)[name = string("q_103_cast_fp16")];
+            tensor<int32, [4]> var_2825 = const()[name = string("op_2825"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2826_cast_fp16 = reshape(shape = var_2825, x = linear_151_cast_fp16)[name = string("op_2826_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_275_to_fp16 = const()[name = string("const_275_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_103_cast_fp16 = mul(x = var_2826_cast_fp16, y = const_275_to_fp16)[name = string("k_103_cast_fp16")];
+            tensor<int32, [4]> var_2832 = const()[name = string("op_2832"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2833_cast_fp16 = reshape(shape = var_2832, x = linear_152_cast_fp16)[name = string("op_2833_cast_fp16")];
+            tensor<int32, [4]> var_2834 = const()[name = string("op_2834"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_51_transpose_x_0 = const()[name = string("qk_51_transpose_x_0"), val = bool(false)];
+            bool qk_51_transpose_y_0 = const()[name = string("qk_51_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_83 = transpose(perm = transpose_83_perm_0, x = k_103_cast_fp16)[name = string("transpose_122")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_82 = transpose(perm = transpose_82_perm_0, x = q_103_cast_fp16)[name = string("transpose_123")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_51_cast_fp16 = matmul(transpose_x = qk_51_transpose_x_0, transpose_y = qk_51_transpose_y_0, x = transpose_82, y = transpose_83)[name = string("qk_51_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2838_cast_fp16 = softmax(axis = var_2774, x = qk_51_cast_fp16)[name = string("op_2838_cast_fp16")];
+            bool var_2840_transpose_x_0 = const()[name = string("op_2840_transpose_x_0"), val = bool(false)];
+            bool var_2840_transpose_y_0 = const()[name = string("op_2840_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_103_cast_fp16 = transpose(perm = var_2834, x = var_2833_cast_fp16)[name = string("transpose_121")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2840_cast_fp16 = matmul(transpose_x = var_2840_transpose_x_0, transpose_y = var_2840_transpose_y_0, x = var_2838_cast_fp16, y = v_103_cast_fp16)[name = string("op_2840_cast_fp16")];
+            tensor<int32, [4]> var_2841 = const()[name = string("op_2841"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_25 = const()[name = string("concat_25"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2842_cast_fp16 = transpose(perm = var_2841, x = var_2840_cast_fp16)[name = string("transpose_120")];
+            tensor<fp16, [1, 1500, 1280]> x_311_cast_fp16 = reshape(shape = concat_25, x = var_2842_cast_fp16)[name = string("x_311_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2846_to_fp16 = const()[name = string("op_2846_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(364023296)))];
+            tensor<fp16, [1280]> var_2847_to_fp16 = const()[name = string("op_2847_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367300160)))];
+            tensor<fp16, [1, 1500, 1280]> linear_153_cast_fp16 = linear(bias = var_2847_to_fp16, weight = var_2846_to_fp16, x = x_311_cast_fp16)[name = string("linear_153_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_313_cast_fp16 = add(x = x_307_cast_fp16, y = linear_153_cast_fp16)[name = string("x_313_cast_fp16")];
+            tensor<int32, [1]> var_2854_axes_0 = const()[name = string("op_2854_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_25_mlp_ln_weight_to_fp16 = const()[name = string("blocks_25_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367302784)))];
+            tensor<fp16, [1280]> blocks_25_mlp_ln_bias_to_fp16 = const()[name = string("blocks_25_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367305408)))];
+            tensor<fp16, [1, 1500, 1280]> var_2854_cast_fp16 = layer_norm(axes = var_2854_axes_0, beta = blocks_25_mlp_ln_bias_to_fp16, epsilon = var_2780_to_fp16, gamma = blocks_25_mlp_ln_weight_to_fp16, x = x_313_cast_fp16)[name = string("op_2854_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2863_to_fp16 = const()[name = string("op_2863_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367308032)))];
+            tensor<fp16, [5120]> var_2864_to_fp16 = const()[name = string("op_2864_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(380415296)))];
+            tensor<fp16, [1, 1500, 5120]> linear_154_cast_fp16 = linear(bias = var_2864_to_fp16, weight = var_2863_to_fp16, x = var_2854_cast_fp16)[name = string("linear_154_cast_fp16")];
+            string x_317_mode_0 = const()[name = string("x_317_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_317_cast_fp16 = gelu(mode = x_317_mode_0, x = linear_154_cast_fp16)[name = string("x_317_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2869_to_fp16 = const()[name = string("op_2869_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(380425600)))];
+            tensor<fp16, [1280]> var_2870_to_fp16 = const()[name = string("op_2870_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393532864)))];
+            tensor<fp16, [1, 1500, 1280]> linear_155_cast_fp16 = linear(bias = var_2870_to_fp16, weight = var_2869_to_fp16, x = x_317_cast_fp16)[name = string("linear_155_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_319_cast_fp16 = add(x = x_313_cast_fp16, y = linear_155_cast_fp16)[name = string("x_319_cast_fp16")];
+            int32 var_2880 = const()[name = string("op_2880"), val = int32(-1)];
+            tensor<int32, [1]> var_2896_axes_0 = const()[name = string("op_2896_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_26_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393535488)))];
+            tensor<fp16, [1280]> blocks_26_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393538112)))];
+            fp16 var_2886_to_fp16 = const()[name = string("op_2886_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2896_cast_fp16 = layer_norm(axes = var_2896_axes_0, beta = blocks_26_attn_ln_bias_to_fp16, epsilon = var_2886_to_fp16, gamma = blocks_26_attn_ln_weight_to_fp16, x = x_319_cast_fp16)[name = string("op_2896_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2907_to_fp16 = const()[name = string("op_2907_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393540736)))];
+            tensor<fp16, [1280]> var_2908_to_fp16 = const()[name = string("op_2908_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(396817600)))];
+            tensor<fp16, [1, 1500, 1280]> linear_156_cast_fp16 = linear(bias = var_2908_to_fp16, weight = var_2907_to_fp16, x = var_2896_cast_fp16)[name = string("linear_156_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2911_to_fp16 = const()[name = string("op_2911_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(396820224)))];
+            tensor<fp16, [1, 1500, 1280]> linear_157_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2911_to_fp16, x = var_2896_cast_fp16)[name = string("linear_157_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2915_to_fp16 = const()[name = string("op_2915_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(400097088)))];
+            tensor<fp16, [1280]> var_2916_to_fp16 = const()[name = string("op_2916_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(403373952)))];
+            tensor<fp16, [1, 1500, 1280]> linear_158_cast_fp16 = linear(bias = var_2916_to_fp16, weight = var_2915_to_fp16, x = var_2896_cast_fp16)[name = string("linear_158_cast_fp16")];
+            tensor<int32, [4]> var_2924 = const()[name = string("op_2924"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2925_cast_fp16 = reshape(shape = var_2924, x = linear_156_cast_fp16)[name = string("op_2925_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_276_to_fp16 = const()[name = string("const_276_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_107_cast_fp16 = mul(x = var_2925_cast_fp16, y = const_276_to_fp16)[name = string("q_107_cast_fp16")];
+            tensor<int32, [4]> var_2931 = const()[name = string("op_2931"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2932_cast_fp16 = reshape(shape = var_2931, x = linear_157_cast_fp16)[name = string("op_2932_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_277_to_fp16 = const()[name = string("const_277_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_107_cast_fp16 = mul(x = var_2932_cast_fp16, y = const_277_to_fp16)[name = string("k_107_cast_fp16")];
+            tensor<int32, [4]> var_2938 = const()[name = string("op_2938"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2939_cast_fp16 = reshape(shape = var_2938, x = linear_158_cast_fp16)[name = string("op_2939_cast_fp16")];
+            tensor<int32, [4]> var_2940 = const()[name = string("op_2940"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)];
+            bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_85 = transpose(perm = transpose_85_perm_0, x = k_107_cast_fp16)[name = string("transpose_118")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = q_107_cast_fp16)[name = string("transpose_119")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_84, y = transpose_85)[name = string("qk_53_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2944_cast_fp16 = softmax(axis = var_2880, x = qk_53_cast_fp16)[name = string("op_2944_cast_fp16")];
+            bool var_2946_transpose_x_0 = const()[name = string("op_2946_transpose_x_0"), val = bool(false)];
+            bool var_2946_transpose_y_0 = const()[name = string("op_2946_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_107_cast_fp16 = transpose(perm = var_2940, x = var_2939_cast_fp16)[name = string("transpose_117")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2946_cast_fp16 = matmul(transpose_x = var_2946_transpose_x_0, transpose_y = var_2946_transpose_y_0, x = var_2944_cast_fp16, y = v_107_cast_fp16)[name = string("op_2946_cast_fp16")];
+            tensor<int32, [4]> var_2947 = const()[name = string("op_2947"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_26 = const()[name = string("concat_26"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2948_cast_fp16 = transpose(perm = var_2947, x = var_2946_cast_fp16)[name = string("transpose_116")];
+            tensor<fp16, [1, 1500, 1280]> x_323_cast_fp16 = reshape(shape = concat_26, x = var_2948_cast_fp16)[name = string("x_323_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2952_to_fp16 = const()[name = string("op_2952_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(403376576)))];
+            tensor<fp16, [1280]> var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406653440)))];
+            tensor<fp16, [1, 1500, 1280]> linear_159_cast_fp16 = linear(bias = var_2953_to_fp16, weight = var_2952_to_fp16, x = x_323_cast_fp16)[name = string("linear_159_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_325_cast_fp16 = add(x = x_319_cast_fp16, y = linear_159_cast_fp16)[name = string("x_325_cast_fp16")];
+            tensor<int32, [1]> var_2960_axes_0 = const()[name = string("op_2960_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_26_mlp_ln_weight_to_fp16 = const()[name = string("blocks_26_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406656064)))];
+            tensor<fp16, [1280]> blocks_26_mlp_ln_bias_to_fp16 = const()[name = string("blocks_26_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406658688)))];
+            tensor<fp16, [1, 1500, 1280]> var_2960_cast_fp16 = layer_norm(axes = var_2960_axes_0, beta = blocks_26_mlp_ln_bias_to_fp16, epsilon = var_2886_to_fp16, gamma = blocks_26_mlp_ln_weight_to_fp16, x = x_325_cast_fp16)[name = string("op_2960_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2969_to_fp16 = const()[name = string("op_2969_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406661312)))];
+            tensor<fp16, [5120]> var_2970_to_fp16 = const()[name = string("op_2970_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(419768576)))];
+            tensor<fp16, [1, 1500, 5120]> linear_160_cast_fp16 = linear(bias = var_2970_to_fp16, weight = var_2969_to_fp16, x = var_2960_cast_fp16)[name = string("linear_160_cast_fp16")];
+            string x_329_mode_0 = const()[name = string("x_329_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_329_cast_fp16 = gelu(mode = x_329_mode_0, x = linear_160_cast_fp16)[name = string("x_329_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2975_to_fp16 = const()[name = string("op_2975_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(419778880)))];
+            tensor<fp16, [1280]> var_2976_to_fp16 = const()[name = string("op_2976_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432886144)))];
+            tensor<fp16, [1, 1500, 1280]> linear_161_cast_fp16 = linear(bias = var_2976_to_fp16, weight = var_2975_to_fp16, x = x_329_cast_fp16)[name = string("linear_161_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_331_cast_fp16 = add(x = x_325_cast_fp16, y = linear_161_cast_fp16)[name = string("x_331_cast_fp16")];
+            int32 var_2986 = const()[name = string("op_2986"), val = int32(-1)];
+            tensor<int32, [1]> var_3002_axes_0 = const()[name = string("op_3002_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_27_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432888768)))];
+            tensor<fp16, [1280]> blocks_27_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432891392)))];
+            fp16 var_2992_to_fp16 = const()[name = string("op_2992_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_3002_cast_fp16 = layer_norm(axes = var_3002_axes_0, beta = blocks_27_attn_ln_bias_to_fp16, epsilon = var_2992_to_fp16, gamma = blocks_27_attn_ln_weight_to_fp16, x = x_331_cast_fp16)[name = string("op_3002_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3013_to_fp16 = const()[name = string("op_3013_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432894016)))];
+            tensor<fp16, [1280]> var_3014_to_fp16 = const()[name = string("op_3014_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(436170880)))];
+            tensor<fp16, [1, 1500, 1280]> linear_162_cast_fp16 = linear(bias = var_3014_to_fp16, weight = var_3013_to_fp16, x = var_3002_cast_fp16)[name = string("linear_162_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3017_to_fp16 = const()[name = string("op_3017_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(436173504)))];
+            tensor<fp16, [1, 1500, 1280]> linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3017_to_fp16, x = var_3002_cast_fp16)[name = string("linear_163_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3021_to_fp16 = const()[name = string("op_3021_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(439450368)))];
+            tensor<fp16, [1280]> var_3022_to_fp16 = const()[name = string("op_3022_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(442727232)))];
+            tensor<fp16, [1, 1500, 1280]> linear_164_cast_fp16 = linear(bias = var_3022_to_fp16, weight = var_3021_to_fp16, x = var_3002_cast_fp16)[name = string("linear_164_cast_fp16")];
+            tensor<int32, [4]> var_3030 = const()[name = string("op_3030"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3031_cast_fp16 = reshape(shape = var_3030, x = linear_162_cast_fp16)[name = string("op_3031_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_278_to_fp16 = const()[name = string("const_278_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_111_cast_fp16 = mul(x = var_3031_cast_fp16, y = const_278_to_fp16)[name = string("q_111_cast_fp16")];
+            tensor<int32, [4]> var_3037 = const()[name = string("op_3037"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3038_cast_fp16 = reshape(shape = var_3037, x = linear_163_cast_fp16)[name = string("op_3038_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_279_to_fp16 = const()[name = string("const_279_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_111_cast_fp16 = mul(x = var_3038_cast_fp16, y = const_279_to_fp16)[name = string("k_111_cast_fp16")];
+            tensor<int32, [4]> var_3044 = const()[name = string("op_3044"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3045_cast_fp16 = reshape(shape = var_3044, x = linear_164_cast_fp16)[name = string("op_3045_cast_fp16")];
+            tensor<int32, [4]> var_3046 = const()[name = string("op_3046"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)];
+            bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_87 = transpose(perm = transpose_87_perm_0, x = k_111_cast_fp16)[name = string("transpose_114")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_86 = transpose(perm = transpose_86_perm_0, x = q_111_cast_fp16)[name = string("transpose_115")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_86, y = transpose_87)[name = string("qk_55_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3050_cast_fp16 = softmax(axis = var_2986, x = qk_55_cast_fp16)[name = string("op_3050_cast_fp16")];
+            bool var_3052_transpose_x_0 = const()[name = string("op_3052_transpose_x_0"), val = bool(false)];
+            bool var_3052_transpose_y_0 = const()[name = string("op_3052_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_111_cast_fp16 = transpose(perm = var_3046, x = var_3045_cast_fp16)[name = string("transpose_113")];
+            tensor<fp16, [1, 20, 1500, 64]> var_3052_cast_fp16 = matmul(transpose_x = var_3052_transpose_x_0, transpose_y = var_3052_transpose_y_0, x = var_3050_cast_fp16, y = v_111_cast_fp16)[name = string("op_3052_cast_fp16")];
+            tensor<int32, [4]> var_3053 = const()[name = string("op_3053"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_27 = const()[name = string("concat_27"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3054_cast_fp16 = transpose(perm = var_3053, x = var_3052_cast_fp16)[name = string("transpose_112")];
+            tensor<fp16, [1, 1500, 1280]> x_335_cast_fp16 = reshape(shape = concat_27, x = var_3054_cast_fp16)[name = string("x_335_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3058_to_fp16 = const()[name = string("op_3058_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(442729856)))];
+            tensor<fp16, [1280]> var_3059_to_fp16 = const()[name = string("op_3059_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446006720)))];
+            tensor<fp16, [1, 1500, 1280]> linear_165_cast_fp16 = linear(bias = var_3059_to_fp16, weight = var_3058_to_fp16, x = x_335_cast_fp16)[name = string("linear_165_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_337_cast_fp16 = add(x = x_331_cast_fp16, y = linear_165_cast_fp16)[name = string("x_337_cast_fp16")];
+            tensor<int32, [1]> var_3066_axes_0 = const()[name = string("op_3066_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_27_mlp_ln_weight_to_fp16 = const()[name = string("blocks_27_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446009344)))];
+            tensor<fp16, [1280]> blocks_27_mlp_ln_bias_to_fp16 = const()[name = string("blocks_27_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446011968)))];
+            tensor<fp16, [1, 1500, 1280]> var_3066_cast_fp16 = layer_norm(axes = var_3066_axes_0, beta = blocks_27_mlp_ln_bias_to_fp16, epsilon = var_2992_to_fp16, gamma = blocks_27_mlp_ln_weight_to_fp16, x = x_337_cast_fp16)[name = string("op_3066_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3075_to_fp16 = const()[name = string("op_3075_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446014592)))];
+            tensor<fp16, [5120]> var_3076_to_fp16 = const()[name = string("op_3076_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(459121856)))];
+            tensor<fp16, [1, 1500, 5120]> linear_166_cast_fp16 = linear(bias = var_3076_to_fp16, weight = var_3075_to_fp16, x = var_3066_cast_fp16)[name = string("linear_166_cast_fp16")];
+            string x_341_mode_0 = const()[name = string("x_341_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_341_cast_fp16 = gelu(mode = x_341_mode_0, x = linear_166_cast_fp16)[name = string("x_341_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3081_to_fp16 = const()[name = string("op_3081_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(459132160)))];
+            tensor<fp16, [1280]> var_3082_to_fp16 = const()[name = string("op_3082_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472239424)))];
+            tensor<fp16, [1, 1500, 1280]> linear_167_cast_fp16 = linear(bias = var_3082_to_fp16, weight = var_3081_to_fp16, x = x_341_cast_fp16)[name = string("linear_167_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_343_cast_fp16 = add(x = x_337_cast_fp16, y = linear_167_cast_fp16)[name = string("x_343_cast_fp16")];
+            int32 var_3092 = const()[name = string("op_3092"), val = int32(-1)];
+            tensor<int32, [1]> var_3108_axes_0 = const()[name = string("op_3108_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_28_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472242048)))];
+            tensor<fp16, [1280]> blocks_28_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472244672)))];
+            fp16 var_3098_to_fp16 = const()[name = string("op_3098_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_3108_cast_fp16 = layer_norm(axes = var_3108_axes_0, beta = blocks_28_attn_ln_bias_to_fp16, epsilon = var_3098_to_fp16, gamma = blocks_28_attn_ln_weight_to_fp16, x = x_343_cast_fp16)[name = string("op_3108_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3119_to_fp16 = const()[name = string("op_3119_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472247296)))];
+            tensor<fp16, [1280]> var_3120_to_fp16 = const()[name = string("op_3120_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(475524160)))];
+            tensor<fp16, [1, 1500, 1280]> linear_168_cast_fp16 = linear(bias = var_3120_to_fp16, weight = var_3119_to_fp16, x = var_3108_cast_fp16)[name = string("linear_168_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3123_to_fp16 = const()[name = string("op_3123_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(475526784)))];
+            tensor<fp16, [1, 1500, 1280]> linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3123_to_fp16, x = var_3108_cast_fp16)[name = string("linear_169_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3127_to_fp16 = const()[name = string("op_3127_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(478803648)))];
+            tensor<fp16, [1280]> var_3128_to_fp16 = const()[name = string("op_3128_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(482080512)))];
+            tensor<fp16, [1, 1500, 1280]> linear_170_cast_fp16 = linear(bias = var_3128_to_fp16, weight = var_3127_to_fp16, x = var_3108_cast_fp16)[name = string("linear_170_cast_fp16")];
+            tensor<int32, [4]> var_3136 = const()[name = string("op_3136"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3137_cast_fp16 = reshape(shape = var_3136, x = linear_168_cast_fp16)[name = string("op_3137_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_115_cast_fp16 = mul(x = var_3137_cast_fp16, y = const_280_to_fp16)[name = string("q_115_cast_fp16")];
+            tensor<int32, [4]> var_3143 = const()[name = string("op_3143"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3144_cast_fp16 = reshape(shape = var_3143, x = linear_169_cast_fp16)[name = string("op_3144_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_115_cast_fp16 = mul(x = var_3144_cast_fp16, y = const_281_to_fp16)[name = string("k_115_cast_fp16")];
+            tensor<int32, [4]> var_3150 = const()[name = string("op_3150"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3151_cast_fp16 = reshape(shape = var_3150, x = linear_170_cast_fp16)[name = string("op_3151_cast_fp16")];
+            tensor<int32, [4]> var_3152 = const()[name = string("op_3152"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_57_transpose_x_0 = const()[name = string("qk_57_transpose_x_0"), val = bool(false)];
+            bool qk_57_transpose_y_0 = const()[name = string("qk_57_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_89 = transpose(perm = transpose_89_perm_0, x = k_115_cast_fp16)[name = string("transpose_110")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = q_115_cast_fp16)[name = string("transpose_111")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_57_cast_fp16 = matmul(transpose_x = qk_57_transpose_x_0, transpose_y = qk_57_transpose_y_0, x = transpose_88, y = transpose_89)[name = string("qk_57_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3156_cast_fp16 = softmax(axis = var_3092, x = qk_57_cast_fp16)[name = string("op_3156_cast_fp16")];
+            bool var_3158_transpose_x_0 = const()[name = string("op_3158_transpose_x_0"), val = bool(false)];
+            bool var_3158_transpose_y_0 = const()[name = string("op_3158_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_115_cast_fp16 = transpose(perm = var_3152, x = var_3151_cast_fp16)[name = string("transpose_109")];
+            tensor<fp16, [1, 20, 1500, 64]> var_3158_cast_fp16 = matmul(transpose_x = var_3158_transpose_x_0, transpose_y = var_3158_transpose_y_0, x = var_3156_cast_fp16, y = v_115_cast_fp16)[name = string("op_3158_cast_fp16")];
+            tensor<int32, [4]> var_3159 = const()[name = string("op_3159"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_28 = const()[name = string("concat_28"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3160_cast_fp16 = transpose(perm = var_3159, x = var_3158_cast_fp16)[name = string("transpose_108")];
+            tensor<fp16, [1, 1500, 1280]> x_347_cast_fp16 = reshape(shape = concat_28, x = var_3160_cast_fp16)[name = string("x_347_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3164_to_fp16 = const()[name = string("op_3164_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(482083136)))];
+            tensor<fp16, [1280]> var_3165_to_fp16 = const()[name = string("op_3165_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485360000)))];
+            tensor<fp16, [1, 1500, 1280]> linear_171_cast_fp16 = linear(bias = var_3165_to_fp16, weight = var_3164_to_fp16, x = x_347_cast_fp16)[name = string("linear_171_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_349_cast_fp16 = add(x = x_343_cast_fp16, y = linear_171_cast_fp16)[name = string("x_349_cast_fp16")];
+            tensor<int32, [1]> var_3172_axes_0 = const()[name = string("op_3172_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_28_mlp_ln_weight_to_fp16 = const()[name = string("blocks_28_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485362624)))];
+            tensor<fp16, [1280]> blocks_28_mlp_ln_bias_to_fp16 = const()[name = string("blocks_28_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485365248)))];
+            tensor<fp16, [1, 1500, 1280]> var_3172_cast_fp16 = layer_norm(axes = var_3172_axes_0, beta = blocks_28_mlp_ln_bias_to_fp16, epsilon = var_3098_to_fp16, gamma = blocks_28_mlp_ln_weight_to_fp16, x = x_349_cast_fp16)[name = string("op_3172_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3181_to_fp16 = const()[name = string("op_3181_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485367872)))];
+            tensor<fp16, [5120]> var_3182_to_fp16 = const()[name = string("op_3182_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(498475136)))];
+            tensor<fp16, [1, 1500, 5120]> linear_172_cast_fp16 = linear(bias = var_3182_to_fp16, weight = var_3181_to_fp16, x = var_3172_cast_fp16)[name = string("linear_172_cast_fp16")];
+            string x_353_mode_0 = const()[name = string("x_353_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_353_cast_fp16 = gelu(mode = x_353_mode_0, x = linear_172_cast_fp16)[name = string("x_353_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3187_to_fp16 = const()[name = string("op_3187_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(498485440)))];
+            tensor<fp16, [1280]> var_3188_to_fp16 = const()[name = string("op_3188_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511592704)))];
+            tensor<fp16, [1, 1500, 1280]> linear_173_cast_fp16 = linear(bias = var_3188_to_fp16, weight = var_3187_to_fp16, x = x_353_cast_fp16)[name = string("linear_173_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_355_cast_fp16 = add(x = x_349_cast_fp16, y = linear_173_cast_fp16)[name = string("x_355_cast_fp16")];
+            int32 var_3198 = const()[name = string("op_3198"), val = int32(-1)];
+            tensor<int32, [1]> var_3214_axes_0 = const()[name = string("op_3214_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_29_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511595328)))];
+            tensor<fp16, [1280]> blocks_29_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511597952)))];
+            fp16 var_3204_to_fp16 = const()[name = string("op_3204_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_3214_cast_fp16 = layer_norm(axes = var_3214_axes_0, beta = blocks_29_attn_ln_bias_to_fp16, epsilon = var_3204_to_fp16, gamma = blocks_29_attn_ln_weight_to_fp16, x = x_355_cast_fp16)[name = string("op_3214_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3225_to_fp16 = const()[name = string("op_3225_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511600576)))];
+            tensor<fp16, [1280]> var_3226_to_fp16 = const()[name = string("op_3226_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(514877440)))];
+            tensor<fp16, [1, 1500, 1280]> linear_174_cast_fp16 = linear(bias = var_3226_to_fp16, weight = var_3225_to_fp16, x = var_3214_cast_fp16)[name = string("linear_174_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3229_to_fp16 = const()[name = string("op_3229_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(514880064)))];
+            tensor<fp16, [1, 1500, 1280]> linear_175_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3229_to_fp16, x = var_3214_cast_fp16)[name = string("linear_175_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3233_to_fp16 = const()[name = string("op_3233_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(518156928)))];
+            tensor<fp16, [1280]> var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(521433792)))];
+            tensor<fp16, [1, 1500, 1280]> linear_176_cast_fp16 = linear(bias = var_3234_to_fp16, weight = var_3233_to_fp16, x = var_3214_cast_fp16)[name = string("linear_176_cast_fp16")];
+            tensor<int32, [4]> var_3242 = const()[name = string("op_3242"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3243_cast_fp16 = reshape(shape = var_3242, x = linear_174_cast_fp16)[name = string("op_3243_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_282_to_fp16 = const()[name = string("const_282_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_119_cast_fp16 = mul(x = var_3243_cast_fp16, y = const_282_to_fp16)[name = string("q_119_cast_fp16")];
+            tensor<int32, [4]> var_3249 = const()[name = string("op_3249"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3250_cast_fp16 = reshape(shape = var_3249, x = linear_175_cast_fp16)[name = string("op_3250_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_119_cast_fp16 = mul(x = var_3250_cast_fp16, y = const_283_to_fp16)[name = string("k_119_cast_fp16")];
+            tensor<int32, [4]> var_3256 = const()[name = string("op_3256"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3257_cast_fp16 = reshape(shape = var_3256, x = linear_176_cast_fp16)[name = string("op_3257_cast_fp16")];
+            tensor<int32, [4]> var_3258 = const()[name = string("op_3258"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)];
+            bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_91 = transpose(perm = transpose_91_perm_0, x = k_119_cast_fp16)[name = string("transpose_106")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_90 = transpose(perm = transpose_90_perm_0, x = q_119_cast_fp16)[name = string("transpose_107")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_90, y = transpose_91)[name = string("qk_59_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3262_cast_fp16 = softmax(axis = var_3198, x = qk_59_cast_fp16)[name = string("op_3262_cast_fp16")];
+            bool var_3264_transpose_x_0 = const()[name = string("op_3264_transpose_x_0"), val = bool(false)];
+            bool var_3264_transpose_y_0 = const()[name = string("op_3264_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_119_cast_fp16 = transpose(perm = var_3258, x = var_3257_cast_fp16)[name = string("transpose_105")];
+            tensor<fp16, [1, 20, 1500, 64]> var_3264_cast_fp16 = matmul(transpose_x = var_3264_transpose_x_0, transpose_y = var_3264_transpose_y_0, x = var_3262_cast_fp16, y = v_119_cast_fp16)[name = string("op_3264_cast_fp16")];
+            tensor<int32, [4]> var_3265 = const()[name = string("op_3265"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_29 = const()[name = string("concat_29"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3266_cast_fp16 = transpose(perm = var_3265, x = var_3264_cast_fp16)[name = string("transpose_104")];
+            tensor<fp16, [1, 1500, 1280]> x_359_cast_fp16 = reshape(shape = concat_29, x = var_3266_cast_fp16)[name = string("x_359_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(521436416)))];
+            tensor<fp16, [1280]> var_3271_to_fp16 = const()[name = string("op_3271_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524713280)))];
+            tensor<fp16, [1, 1500, 1280]> linear_177_cast_fp16 = linear(bias = var_3271_to_fp16, weight = var_3270_to_fp16, x = x_359_cast_fp16)[name = string("linear_177_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_361_cast_fp16 = add(x = x_355_cast_fp16, y = linear_177_cast_fp16)[name = string("x_361_cast_fp16")];
+            tensor<int32, [1]> var_3278_axes_0 = const()[name = string("op_3278_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_29_mlp_ln_weight_to_fp16 = const()[name = string("blocks_29_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524715904)))];
+            tensor<fp16, [1280]> blocks_29_mlp_ln_bias_to_fp16 = const()[name = string("blocks_29_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524718528)))];
+            tensor<fp16, [1, 1500, 1280]> var_3278_cast_fp16 = layer_norm(axes = var_3278_axes_0, beta = blocks_29_mlp_ln_bias_to_fp16, epsilon = var_3204_to_fp16, gamma = blocks_29_mlp_ln_weight_to_fp16, x = x_361_cast_fp16)[name = string("op_3278_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3287_to_fp16 = const()[name = string("op_3287_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524721152)))];
+            tensor<fp16, [5120]> var_3288_to_fp16 = const()[name = string("op_3288_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(537828416)))];
+            tensor<fp16, [1, 1500, 5120]> linear_178_cast_fp16 = linear(bias = var_3288_to_fp16, weight = var_3287_to_fp16, x = var_3278_cast_fp16)[name = string("linear_178_cast_fp16")];
+            string x_365_mode_0 = const()[name = string("x_365_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_365_cast_fp16 = gelu(mode = x_365_mode_0, x = linear_178_cast_fp16)[name = string("x_365_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3293_to_fp16 = const()[name = string("op_3293_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(537838720)))];
+            tensor<fp16, [1280]> var_3294_to_fp16 = const()[name = string("op_3294_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550945984)))];
+            tensor<fp16, [1, 1500, 1280]> linear_179_cast_fp16 = linear(bias = var_3294_to_fp16, weight = var_3293_to_fp16, x = x_365_cast_fp16)[name = string("linear_179_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_367_cast_fp16 = add(x = x_361_cast_fp16, y = linear_179_cast_fp16)[name = string("x_367_cast_fp16")];
+            int32 var_3304 = const()[name = string("op_3304"), val = int32(-1)];
+            tensor<int32, [1]> var_3320_axes_0 = const()[name = string("op_3320_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_30_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550948608)))];
+            tensor<fp16, [1280]> blocks_30_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550951232)))];
+            fp16 var_3310_to_fp16 = const()[name = string("op_3310_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_3320_cast_fp16 = layer_norm(axes = var_3320_axes_0, beta = blocks_30_attn_ln_bias_to_fp16, epsilon = var_3310_to_fp16, gamma = blocks_30_attn_ln_weight_to_fp16, x = x_367_cast_fp16)[name = string("op_3320_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3331_to_fp16 = const()[name = string("op_3331_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550953856)))];
+            tensor<fp16, [1280]> var_3332_to_fp16 = const()[name = string("op_3332_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(554230720)))];
+            tensor<fp16, [1, 1500, 1280]> linear_180_cast_fp16 = linear(bias = var_3332_to_fp16, weight = var_3331_to_fp16, x = var_3320_cast_fp16)[name = string("linear_180_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3335_to_fp16 = const()[name = string("op_3335_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(554233344)))];
+            tensor<fp16, [1, 1500, 1280]> linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3335_to_fp16, x = var_3320_cast_fp16)[name = string("linear_181_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3339_to_fp16 = const()[name = string("op_3339_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(557510208)))];
+            tensor<fp16, [1280]> var_3340_to_fp16 = const()[name = string("op_3340_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(560787072)))];
+            tensor<fp16, [1, 1500, 1280]> linear_182_cast_fp16 = linear(bias = var_3340_to_fp16, weight = var_3339_to_fp16, x = var_3320_cast_fp16)[name = string("linear_182_cast_fp16")];
+            tensor<int32, [4]> var_3348 = const()[name = string("op_3348"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3349_cast_fp16 = reshape(shape = var_3348, x = linear_180_cast_fp16)[name = string("op_3349_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_284_to_fp16 = const()[name = string("const_284_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_123_cast_fp16 = mul(x = var_3349_cast_fp16, y = const_284_to_fp16)[name = string("q_123_cast_fp16")];
+            tensor<int32, [4]> var_3355 = const()[name = string("op_3355"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3356_cast_fp16 = reshape(shape = var_3355, x = linear_181_cast_fp16)[name = string("op_3356_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_123_cast_fp16 = mul(x = var_3356_cast_fp16, y = const_285_to_fp16)[name = string("k_123_cast_fp16")];
+            tensor<int32, [4]> var_3362 = const()[name = string("op_3362"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3363_cast_fp16 = reshape(shape = var_3362, x = linear_182_cast_fp16)[name = string("op_3363_cast_fp16")];
+            tensor<int32, [4]> var_3364 = const()[name = string("op_3364"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)];
+            bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_93 = transpose(perm = transpose_93_perm_0, x = k_123_cast_fp16)[name = string("transpose_102")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = q_123_cast_fp16)[name = string("transpose_103")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_92, y = transpose_93)[name = string("qk_61_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3368_cast_fp16 = softmax(axis = var_3304, x = qk_61_cast_fp16)[name = string("op_3368_cast_fp16")];
+            bool var_3370_transpose_x_0 = const()[name = string("op_3370_transpose_x_0"), val = bool(false)];
+            bool var_3370_transpose_y_0 = const()[name = string("op_3370_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_123_cast_fp16 = transpose(perm = var_3364, x = var_3363_cast_fp16)[name = string("transpose_101")];
+            tensor<fp16, [1, 20, 1500, 64]> var_3370_cast_fp16 = matmul(transpose_x = var_3370_transpose_x_0, transpose_y = var_3370_transpose_y_0, x = var_3368_cast_fp16, y = v_123_cast_fp16)[name = string("op_3370_cast_fp16")];
+            tensor<int32, [4]> var_3371 = const()[name = string("op_3371"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_30 = const()[name = string("concat_30"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3372_cast_fp16 = transpose(perm = var_3371, x = var_3370_cast_fp16)[name = string("transpose_100")];
+            tensor<fp16, [1, 1500, 1280]> x_371_cast_fp16 = reshape(shape = concat_30, x = var_3372_cast_fp16)[name = string("x_371_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3376_to_fp16 = const()[name = string("op_3376_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(560789696)))];
+            tensor<fp16, [1280]> var_3377_to_fp16 = const()[name = string("op_3377_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564066560)))];
+            tensor<fp16, [1, 1500, 1280]> linear_183_cast_fp16 = linear(bias = var_3377_to_fp16, weight = var_3376_to_fp16, x = x_371_cast_fp16)[name = string("linear_183_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_373_cast_fp16 = add(x = x_367_cast_fp16, y = linear_183_cast_fp16)[name = string("x_373_cast_fp16")];
+            tensor<int32, [1]> var_3384_axes_0 = const()[name = string("op_3384_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_30_mlp_ln_weight_to_fp16 = const()[name = string("blocks_30_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564069184)))];
+            tensor<fp16, [1280]> blocks_30_mlp_ln_bias_to_fp16 = const()[name = string("blocks_30_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564071808)))];
+            tensor<fp16, [1, 1500, 1280]> var_3384_cast_fp16 = layer_norm(axes = var_3384_axes_0, beta = blocks_30_mlp_ln_bias_to_fp16, epsilon = var_3310_to_fp16, gamma = blocks_30_mlp_ln_weight_to_fp16, x = x_373_cast_fp16)[name = string("op_3384_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3393_to_fp16 = const()[name = string("op_3393_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564074432)))];
+            tensor<fp16, [5120]> var_3394_to_fp16 = const()[name = string("op_3394_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(577181696)))];
+            tensor<fp16, [1, 1500, 5120]> linear_184_cast_fp16 = linear(bias = var_3394_to_fp16, weight = var_3393_to_fp16, x = var_3384_cast_fp16)[name = string("linear_184_cast_fp16")];
+            string x_377_mode_0 = const()[name = string("x_377_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_377_cast_fp16 = gelu(mode = x_377_mode_0, x = linear_184_cast_fp16)[name = string("x_377_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3399_to_fp16 = const()[name = string("op_3399_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(577192000)))];
+            tensor<fp16, [1280]> var_3400_to_fp16 = const()[name = string("op_3400_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590299264)))];
+            tensor<fp16, [1, 1500, 1280]> linear_185_cast_fp16 = linear(bias = var_3400_to_fp16, weight = var_3399_to_fp16, x = x_377_cast_fp16)[name = string("linear_185_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_379_cast_fp16 = add(x = x_373_cast_fp16, y = linear_185_cast_fp16)[name = string("x_379_cast_fp16")];
+            int32 var_3410 = const()[name = string("op_3410"), val = int32(-1)];
+            tensor<int32, [1]> var_3426_axes_0 = const()[name = string("op_3426_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_31_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590301888)))];
+            tensor<fp16, [1280]> blocks_31_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590304512)))];
+            fp16 var_3416_to_fp16 = const()[name = string("op_3416_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_3426_cast_fp16 = layer_norm(axes = var_3426_axes_0, beta = blocks_31_attn_ln_bias_to_fp16, epsilon = var_3416_to_fp16, gamma = blocks_31_attn_ln_weight_to_fp16, x = x_379_cast_fp16)[name = string("op_3426_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3437_to_fp16 = const()[name = string("op_3437_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590307136)))];
+            tensor<fp16, [1280]> var_3438_to_fp16 = const()[name = string("op_3438_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(593584000)))];
+            tensor<fp16, [1, 1500, 1280]> linear_186_cast_fp16 = linear(bias = var_3438_to_fp16, weight = var_3437_to_fp16, x = var_3426_cast_fp16)[name = string("linear_186_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3441_to_fp16 = const()[name = string("op_3441_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(593586624)))];
+            tensor<fp16, [1, 1500, 1280]> linear_187_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3441_to_fp16, x = var_3426_cast_fp16)[name = string("linear_187_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3445_to_fp16 = const()[name = string("op_3445_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(596863488)))];
+            tensor<fp16, [1280]> var_3446_to_fp16 = const()[name = string("op_3446_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(600140352)))];
+            tensor<fp16, [1, 1500, 1280]> linear_188_cast_fp16 = linear(bias = var_3446_to_fp16, weight = var_3445_to_fp16, x = var_3426_cast_fp16)[name = string("linear_188_cast_fp16")];
+            tensor<int32, [4]> var_3454 = const()[name = string("op_3454"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3455_cast_fp16 = reshape(shape = var_3454, x = linear_186_cast_fp16)[name = string("op_3455_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_286_to_fp16 = const()[name = string("const_286_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_cast_fp16 = mul(x = var_3455_cast_fp16, y = const_286_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_3461 = const()[name = string("op_3461"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3462_cast_fp16 = reshape(shape = var_3461, x = linear_187_cast_fp16)[name = string("op_3462_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_cast_fp16 = mul(x = var_3462_cast_fp16, y = const_287_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_3468 = const()[name = string("op_3468"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3469_cast_fp16 = reshape(shape = var_3468, x = linear_188_cast_fp16)[name = string("op_3469_cast_fp16")];
+            tensor<int32, [4]> var_3470 = const()[name = string("op_3470"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_95 = transpose(perm = transpose_95_perm_0, x = k_cast_fp16)[name = string("transpose_98")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_94 = transpose(perm = transpose_94_perm_0, x = q_cast_fp16)[name = string("transpose_99")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_94, y = transpose_95)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3474_cast_fp16 = softmax(axis = var_3410, x = qk_cast_fp16)[name = string("op_3474_cast_fp16")];
+            bool var_3476_transpose_x_0 = const()[name = string("op_3476_transpose_x_0"), val = bool(false)];
+            bool var_3476_transpose_y_0 = const()[name = string("op_3476_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_cast_fp16 = transpose(perm = var_3470, x = var_3469_cast_fp16)[name = string("transpose_97")];
+            tensor<fp16, [1, 20, 1500, 64]> var_3476_cast_fp16 = matmul(transpose_x = var_3476_transpose_x_0, transpose_y = var_3476_transpose_y_0, x = var_3474_cast_fp16, y = v_cast_fp16)[name = string("op_3476_cast_fp16")];
+            tensor<int32, [4]> var_3477 = const()[name = string("op_3477"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_31 = const()[name = string("concat_31"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3478_cast_fp16 = transpose(perm = var_3477, x = var_3476_cast_fp16)[name = string("transpose_96")];
+            tensor<fp16, [1, 1500, 1280]> x_383_cast_fp16 = reshape(shape = concat_31, x = var_3478_cast_fp16)[name = string("x_383_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3482_to_fp16 = const()[name = string("op_3482_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(600142976)))];
+            tensor<fp16, [1280]> var_3483_to_fp16 = const()[name = string("op_3483_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603419840)))];
+            tensor<fp16, [1, 1500, 1280]> linear_189_cast_fp16 = linear(bias = var_3483_to_fp16, weight = var_3482_to_fp16, x = x_383_cast_fp16)[name = string("linear_189_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_385_cast_fp16 = add(x = x_379_cast_fp16, y = linear_189_cast_fp16)[name = string("x_385_cast_fp16")];
+            tensor<int32, [1]> var_3490_axes_0 = const()[name = string("op_3490_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_31_mlp_ln_weight_to_fp16 = const()[name = string("blocks_31_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603422464)))];
+            tensor<fp16, [1280]> blocks_31_mlp_ln_bias_to_fp16 = const()[name = string("blocks_31_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603425088)))];
+            tensor<fp16, [1, 1500, 1280]> var_3490_cast_fp16 = layer_norm(axes = var_3490_axes_0, beta = blocks_31_mlp_ln_bias_to_fp16, epsilon = var_3416_to_fp16, gamma = blocks_31_mlp_ln_weight_to_fp16, x = x_385_cast_fp16)[name = string("op_3490_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603427712)))];
+            tensor<fp16, [5120]> var_3500_to_fp16 = const()[name = string("op_3500_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(616534976)))];
+            tensor<fp16, [1, 1500, 5120]> linear_190_cast_fp16 = linear(bias = var_3500_to_fp16, weight = var_3499_to_fp16, x = var_3490_cast_fp16)[name = string("linear_190_cast_fp16")];
+            string x_389_mode_0 = const()[name = string("x_389_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_389_cast_fp16 = gelu(mode = x_389_mode_0, x = linear_190_cast_fp16)[name = string("x_389_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3505_to_fp16 = const()[name = string("op_3505_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(616545280)))];
+            tensor<fp16, [1280]> var_3506_to_fp16 = const()[name = string("op_3506_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629652544)))];
+            tensor<fp16, [1, 1500, 1280]> linear_191_cast_fp16 = linear(bias = var_3506_to_fp16, weight = var_3505_to_fp16, x = x_389_cast_fp16)[name = string("linear_191_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_cast_fp16 = add(x = x_385_cast_fp16, y = linear_191_cast_fp16)[name = string("x_cast_fp16")];
+            tensor<int32, [1]> var_3519_axes_0 = const()[name = string("op_3519_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629655168)))];
+            tensor<fp16, [1280]> ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629657792)))];
+            fp16 var_3510_to_fp16 = const()[name = string("op_3510_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> output = layer_norm(axes = var_3519_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_3510_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_3519_cast_fp16")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/large-v2/encoder.mlmodelc/model1/weights/1-weight.bin b/large-v2/encoder.mlmodelc/model1/weights/1-weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..428c534992e427a9b38c763e4c3feb452ac04feb
--- /dev/null
+++ b/large-v2/encoder.mlmodelc/model1/weights/1-weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b18deffd43b1f394f0f9d6434ef3e042c9e0424f8b590891a5cb0c21e4951163
+size 629660416
diff --git a/large-v2/model_dims.json b/large-v2/model_dims.json
new file mode 100644
index 0000000000000000000000000000000000000000..e22ceb62f4ffcdfe89361b7377ef1300f531463c
--- /dev/null
+++ b/large-v2/model_dims.json
@@ -0,0 +1,12 @@
+{
+  "n_mels": 80,
+  "n_audio_ctx": 1500,
+  "n_audio_state": 1280,
+  "n_audio_head": 20,
+  "n_audio_layer": 32,
+  "n_vocab": 51865,
+  "n_text_ctx": 448,
+  "n_text_state": 1280,
+  "n_text_head": 20,
+  "n_text_layer": 32
+}
\ No newline at end of file
diff --git a/large-v3/decoder_first.mlmodelc/analytics/coremldata.bin b/large-v3/decoder_first.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b3502c1971106c8ddba15a6d19cbe212e9040b51
--- /dev/null
+++ b/large-v3/decoder_first.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a564dfd67cfcb3c0ee8cd9f7ef9f303fbfc561e635709bd3a46c5870571079de
+size 243
diff --git a/large-v3/decoder_first.mlmodelc/coremldata.bin b/large-v3/decoder_first.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..78fa71791f49b098c63687ec844348e5cd25cd92
--- /dev/null
+++ b/large-v3/decoder_first.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6214be9e110a102836fb1fdb960a2fb564e60f5d9e3d1e25a9b7f978309480e
+size 453
diff --git a/large-v3/decoder_first.mlmodelc/metadata.json b/large-v3/decoder_first.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..62548d3742d04f712f1bad76294f859bb5029d22
--- /dev/null
+++ b/large-v3/decoder_first.mlmodelc/metadata.json
@@ -0,0 +1,106 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "dummy",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.writeState" : 66,
+      "Shape" : 64,
+      "Ios18.linear" : 64,
+      "Identity" : 1,
+      "Ios18.gather" : 64,
+      "Ios18.concat" : 64,
+      "Ios18.sliceUpdate" : 66,
+      "Ios18.cast" : 128,
+      "Ios18.expandDims" : 64,
+      "Ios18.readState" : 66
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 448 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 448, 1280]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 448 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 448, 1280]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 1500, 1280]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 1500, 1280]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...1500 × 1280",
+        "shapeRange" : "[[1, 1], [1, 1500], [1280, 1280]]",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1280)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1, 1280]",
+        "name" : "audio_data",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_first",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/large-v3/decoder_first.mlmodelc/model.mil b/large-v3/decoder_first.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..5e9505ec80acb3d396de560006ff76f4da79cc6a
--- /dev/null
+++ b/large-v3/decoder_first.mlmodelc/model.mil
@@ -0,0 +1,1851 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [1, ?, 1280]> audio_data, state<tensor<fp16, [32, 1, 448, 1280]>> k_cache1, state<tensor<fp16, [32, 1, 1500, 1280]>> k_cache2, state<tensor<fp16, [32, 1, 448, 1280]>> v_cache1, state<tensor<fp16, [32, 1, 1500, 1280]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 1280]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [1280, 1280]]}})))] {
+            tensor<fp16, [1, ?, 1280]> dummy = identity(x = audio_data)[name = string("identity_0")];
+            tensor<fp16, [32, 1, 448, 1280]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor<fp16, [32, 1, 448, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<fp16, [32, 1, 1500, 1280]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<fp16, [1280, 1280]> var_131_to_fp16 = const()[name = string("op_131_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36700288)))];
+            tensor<fp16, [1280]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39977152)))];
+            tensor<fp16, [1, ?, 1280]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_131_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_135_to_fp16 = const()[name = string("op_135_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39979776)))];
+            tensor<fp16, [1280]> var_136_to_fp16 = const()[name = string("op_136_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43256640)))];
+            tensor<fp16, [1, ?, 1280]> linear_1_cast_fp16 = linear(bias = var_136_to_fp16, weight = var_135_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")];
+            tensor<int32, [3]> var_138_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_138_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_138_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_138_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [3]> var_138_shape_cast_fp16_to_int16 = cast(dtype = var_138_shape_cast_fp16_to_int16_dtype_0, x = var_138_shape_cast_fp16)[name = string("cast_199")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_138_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_198")];
+            tensor<int32, [1]> expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")];
+            tensor<int32, [4]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
+            bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_68_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_68 = read_state(input = k_cache2)[name = string("coreml_update_state_68")];
+            tensor<int32, [3]> var_143_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_143_shape_cast_fp16")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_143_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_143_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_143_shape_cast_fp16_to_uint16 = cast(dtype = var_143_shape_cast_fp16_to_uint16_dtype_0, x = var_143_shape_cast_fp16)[name = string("cast_197")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_143_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_196")];
+            tensor<int32, [1]> expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")];
+            tensor<int32, [4]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)];
+            bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_69_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_69 = read_state(input = v_cache2)[name = string("coreml_update_state_69")];
+            tensor<fp16, [1280, 1280]> var_165_to_fp16 = const()[name = string("op_165_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43259264)))];
+            tensor<fp16, [1, ?, 1280]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_165_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_169_to_fp16 = const()[name = string("op_169_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46536128)))];
+            tensor<fp16, [1280]> var_170_to_fp16 = const()[name = string("op_170_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49812992)))];
+            tensor<fp16, [1, ?, 1280]> linear_3_cast_fp16 = linear(bias = var_170_to_fp16, weight = var_169_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")];
+            tensor<int32, [3]> var_172_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_172_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_172_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_172_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_172_shape_cast_fp16_to_uint16 = cast(dtype = var_172_shape_cast_fp16_to_uint16_dtype_0, x = var_172_shape_cast_fp16)[name = string("cast_195")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_172_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_194")];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")];
+            tensor<int32, [4]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)];
+            bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_68)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_70_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_70 = read_state(input = k_cache2)[name = string("coreml_update_state_70")];
+            tensor<int32, [3]> var_177_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_177_shape_cast_fp16")];
+            int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)];
+            int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)];
+            bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)];
+            string var_177_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_177_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_177_shape_cast_fp16_to_uint16 = cast(dtype = var_177_shape_cast_fp16_to_uint16_dtype_0, x = var_177_shape_cast_fp16)[name = string("cast_193")];
+            uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_177_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")];
+            string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_192")];
+            tensor<int32, [1]> expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")];
+            tensor<int32, [4]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_69)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_71_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_71 = read_state(input = v_cache2)[name = string("coreml_update_state_71")];
+            tensor<fp16, [1280, 1280]> var_199_to_fp16 = const()[name = string("op_199_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49815616)))];
+            tensor<fp16, [1, ?, 1280]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_199_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_203_to_fp16 = const()[name = string("op_203_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53092480)))];
+            tensor<fp16, [1280]> var_204_to_fp16 = const()[name = string("op_204_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56369344)))];
+            tensor<fp16, [1, ?, 1280]> linear_5_cast_fp16 = linear(bias = var_204_to_fp16, weight = var_203_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")];
+            tensor<int32, [3]> var_206_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_206_shape_cast_fp16")];
+            int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)];
+            int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)];
+            bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)];
+            string var_206_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_206_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_206_shape_cast_fp16_to_uint16 = cast(dtype = var_206_shape_cast_fp16_to_uint16_dtype_0, x = var_206_shape_cast_fp16)[name = string("cast_191")];
+            uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_206_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")];
+            string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_190")];
+            tensor<int32, [1]> expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")];
+            tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)];
+            bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_70)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_72_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_72 = read_state(input = k_cache2)[name = string("coreml_update_state_72")];
+            tensor<int32, [3]> var_211_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_211_shape_cast_fp16")];
+            int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)];
+            int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)];
+            bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)];
+            string var_211_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_211_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_211_shape_cast_fp16_to_uint16 = cast(dtype = var_211_shape_cast_fp16_to_uint16_dtype_0, x = var_211_shape_cast_fp16)[name = string("cast_189")];
+            uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_211_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")];
+            string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_188")];
+            tensor<int32, [1]> expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")];
+            tensor<int32, [4]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)];
+            bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_71)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_73_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_73 = read_state(input = v_cache2)[name = string("coreml_update_state_73")];
+            tensor<fp16, [1280, 1280]> var_233_to_fp16 = const()[name = string("op_233_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56371968)))];
+            tensor<fp16, [1, ?, 1280]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_233_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_237_to_fp16 = const()[name = string("op_237_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59648832)))];
+            tensor<fp16, [1280]> var_238_to_fp16 = const()[name = string("op_238_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62925696)))];
+            tensor<fp16, [1, ?, 1280]> linear_7_cast_fp16 = linear(bias = var_238_to_fp16, weight = var_237_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")];
+            tensor<int32, [3]> var_240_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_240_shape_cast_fp16")];
+            int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)];
+            int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)];
+            bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)];
+            string var_240_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_240_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_240_shape_cast_fp16_to_uint16 = cast(dtype = var_240_shape_cast_fp16_to_uint16_dtype_0, x = var_240_shape_cast_fp16)[name = string("cast_187")];
+            uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_240_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")];
+            string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_186")];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")];
+            tensor<int32, [4]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)];
+            bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_72)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_74_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_74 = read_state(input = k_cache2)[name = string("coreml_update_state_74")];
+            tensor<int32, [3]> var_245_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_245_shape_cast_fp16")];
+            int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)];
+            int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)];
+            bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)];
+            string var_245_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_245_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_245_shape_cast_fp16_to_uint16 = cast(dtype = var_245_shape_cast_fp16_to_uint16_dtype_0, x = var_245_shape_cast_fp16)[name = string("cast_185")];
+            uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_245_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")];
+            string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_184")];
+            tensor<int32, [1]> expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")];
+            tensor<int32, [4]> concat_26 = const()[name = string("concat_26"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_73)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_75_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_75 = read_state(input = v_cache2)[name = string("coreml_update_state_75")];
+            tensor<fp16, [1280, 1280]> var_267_to_fp16 = const()[name = string("op_267_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62928320)))];
+            tensor<fp16, [1, ?, 1280]> linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_267_to_fp16, x = audio_data)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_271_to_fp16 = const()[name = string("op_271_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66205184)))];
+            tensor<fp16, [1280]> var_272_to_fp16 = const()[name = string("op_272_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69482048)))];
+            tensor<fp16, [1, ?, 1280]> linear_9_cast_fp16 = linear(bias = var_272_to_fp16, weight = var_271_to_fp16, x = audio_data)[name = string("linear_9_cast_fp16")];
+            tensor<int32, [3]> var_274_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_274_shape_cast_fp16")];
+            int32 gather_8_axis_0 = const()[name = string("gather_8_axis_0"), val = int32(0)];
+            int32 gather_8_batch_dims_0 = const()[name = string("gather_8_batch_dims_0"), val = int32(0)];
+            bool gather_8_validate_indices_0 = const()[name = string("gather_8_validate_indices_0"), val = bool(false)];
+            string var_274_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_274_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_8_to_uint16 = const()[name = string("select_8_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_274_shape_cast_fp16_to_uint16 = cast(dtype = var_274_shape_cast_fp16_to_uint16_dtype_0, x = var_274_shape_cast_fp16)[name = string("cast_183")];
+            uint16 gather_8_cast_uint16 = gather(axis = gather_8_axis_0, batch_dims = gather_8_batch_dims_0, indices = select_8_to_uint16, validate_indices = gather_8_validate_indices_0, x = var_274_shape_cast_fp16_to_uint16)[name = string("gather_8_cast_uint16")];
+            string gather_8_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_8_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_43_axes_0 = const()[name = string("expand_dims_43_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_8_cast_uint16_to_int32 = cast(dtype = gather_8_cast_uint16_to_int32_dtype_0, x = gather_8_cast_uint16)[name = string("cast_182")];
+            tensor<int32, [1]> expand_dims_43 = expand_dims(axes = expand_dims_43_axes_0, x = gather_8_cast_uint16_to_int32)[name = string("expand_dims_43")];
+            tensor<int32, [4]> concat_29 = const()[name = string("concat_29"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [1]> concat_30_values0_0 = const()[name = string("concat_30_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_30_values1_0 = const()[name = string("concat_30_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_30_values3_0 = const()[name = string("concat_30_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)];
+            bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (concat_30_values0_0, concat_30_values1_0, expand_dims_43, concat_30_values3_0))[name = string("concat_30")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_29, begin_mask = k_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_30, end_mask = k_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_5_stride_0, update = linear_8_cast_fp16, x = coreml_update_state_74)[name = string("k_cache2_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_5_cast_fp16, input = k_cache2)[name = string("coreml_update_state_76_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_76 = read_state(input = k_cache2)[name = string("coreml_update_state_76")];
+            tensor<int32, [3]> var_279_shape_cast_fp16 = shape(x = linear_9_cast_fp16)[name = string("op_279_shape_cast_fp16")];
+            int32 gather_9_axis_0 = const()[name = string("gather_9_axis_0"), val = int32(0)];
+            int32 gather_9_batch_dims_0 = const()[name = string("gather_9_batch_dims_0"), val = int32(0)];
+            bool gather_9_validate_indices_0 = const()[name = string("gather_9_validate_indices_0"), val = bool(false)];
+            string var_279_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_279_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_9_to_uint16 = const()[name = string("select_9_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_279_shape_cast_fp16_to_uint16 = cast(dtype = var_279_shape_cast_fp16_to_uint16_dtype_0, x = var_279_shape_cast_fp16)[name = string("cast_181")];
+            uint16 gather_9_cast_uint16 = gather(axis = gather_9_axis_0, batch_dims = gather_9_batch_dims_0, indices = select_9_to_uint16, validate_indices = gather_9_validate_indices_0, x = var_279_shape_cast_fp16_to_uint16)[name = string("gather_9_cast_uint16")];
+            string gather_9_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_9_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_47_axes_0 = const()[name = string("expand_dims_47_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_9_cast_uint16_to_int32 = cast(dtype = gather_9_cast_uint16_to_int32_dtype_0, x = gather_9_cast_uint16)[name = string("cast_180")];
+            tensor<int32, [1]> expand_dims_47 = expand_dims(axes = expand_dims_47_axes_0, x = gather_9_cast_uint16_to_int32)[name = string("expand_dims_47")];
+            tensor<int32, [4]> concat_32 = const()[name = string("concat_32"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [1]> concat_33_values0_0 = const()[name = string("concat_33_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_33_values1_0 = const()[name = string("concat_33_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_33_values3_0 = const()[name = string("concat_33_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)];
+            bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (concat_33_values0_0, concat_33_values1_0, expand_dims_47, concat_33_values3_0))[name = string("concat_33")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_32, begin_mask = v_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_33, end_mask = v_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_5_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_75)[name = string("v_cache2_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_5_cast_fp16, input = v_cache2)[name = string("coreml_update_state_77_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_77 = read_state(input = v_cache2)[name = string("coreml_update_state_77")];
+            tensor<fp16, [1280, 1280]> var_301_to_fp16 = const()[name = string("op_301_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69484672)))];
+            tensor<fp16, [1, ?, 1280]> linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_301_to_fp16, x = audio_data)[name = string("linear_10_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_305_to_fp16 = const()[name = string("op_305_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72761536)))];
+            tensor<fp16, [1280]> var_306_to_fp16 = const()[name = string("op_306_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76038400)))];
+            tensor<fp16, [1, ?, 1280]> linear_11_cast_fp16 = linear(bias = var_306_to_fp16, weight = var_305_to_fp16, x = audio_data)[name = string("linear_11_cast_fp16")];
+            tensor<int32, [3]> var_308_shape_cast_fp16 = shape(x = linear_10_cast_fp16)[name = string("op_308_shape_cast_fp16")];
+            int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)];
+            int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)];
+            bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)];
+            string var_308_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_308_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_308_shape_cast_fp16_to_uint16 = cast(dtype = var_308_shape_cast_fp16_to_uint16_dtype_0, x = var_308_shape_cast_fp16)[name = string("cast_179")];
+            uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_308_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")];
+            string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_178")];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = gather_10_cast_uint16_to_int32)[name = string("expand_dims_51")];
+            tensor<int32, [4]> concat_35 = const()[name = string("concat_35"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [1]> concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)];
+            bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, expand_dims_51, concat_36_values3_0))[name = string("concat_36")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_35, begin_mask = k_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_36, end_mask = k_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_6_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_76)[name = string("k_cache2_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_6_cast_fp16, input = k_cache2)[name = string("coreml_update_state_78_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_78 = read_state(input = k_cache2)[name = string("coreml_update_state_78")];
+            tensor<int32, [3]> var_313_shape_cast_fp16 = shape(x = linear_11_cast_fp16)[name = string("op_313_shape_cast_fp16")];
+            int32 gather_11_axis_0 = const()[name = string("gather_11_axis_0"), val = int32(0)];
+            int32 gather_11_batch_dims_0 = const()[name = string("gather_11_batch_dims_0"), val = int32(0)];
+            bool gather_11_validate_indices_0 = const()[name = string("gather_11_validate_indices_0"), val = bool(false)];
+            string var_313_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_313_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_11_to_uint16 = const()[name = string("select_11_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_313_shape_cast_fp16_to_uint16 = cast(dtype = var_313_shape_cast_fp16_to_uint16_dtype_0, x = var_313_shape_cast_fp16)[name = string("cast_177")];
+            uint16 gather_11_cast_uint16 = gather(axis = gather_11_axis_0, batch_dims = gather_11_batch_dims_0, indices = select_11_to_uint16, validate_indices = gather_11_validate_indices_0, x = var_313_shape_cast_fp16_to_uint16)[name = string("gather_11_cast_uint16")];
+            string gather_11_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_11_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_55_axes_0 = const()[name = string("expand_dims_55_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_11_cast_uint16_to_int32 = cast(dtype = gather_11_cast_uint16_to_int32_dtype_0, x = gather_11_cast_uint16)[name = string("cast_176")];
+            tensor<int32, [1]> expand_dims_55 = expand_dims(axes = expand_dims_55_axes_0, x = gather_11_cast_uint16_to_int32)[name = string("expand_dims_55")];
+            tensor<int32, [4]> concat_38 = const()[name = string("concat_38"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [1]> concat_39_values0_0 = const()[name = string("concat_39_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)];
+            bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (concat_39_values0_0, concat_39_values1_0, expand_dims_55, concat_39_values3_0))[name = string("concat_39")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_38, begin_mask = v_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_39, end_mask = v_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_6_stride_0, update = linear_11_cast_fp16, x = coreml_update_state_77)[name = string("v_cache2_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_6_cast_fp16, input = v_cache2)[name = string("coreml_update_state_79_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_79 = read_state(input = v_cache2)[name = string("coreml_update_state_79")];
+            tensor<fp16, [1280, 1280]> var_335_to_fp16 = const()[name = string("op_335_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76041024)))];
+            tensor<fp16, [1, ?, 1280]> linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_335_to_fp16, x = audio_data)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_339_to_fp16 = const()[name = string("op_339_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79317888)))];
+            tensor<fp16, [1280]> var_340_to_fp16 = const()[name = string("op_340_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82594752)))];
+            tensor<fp16, [1, ?, 1280]> linear_13_cast_fp16 = linear(bias = var_340_to_fp16, weight = var_339_to_fp16, x = audio_data)[name = string("linear_13_cast_fp16")];
+            tensor<int32, [3]> var_342_shape_cast_fp16 = shape(x = linear_12_cast_fp16)[name = string("op_342_shape_cast_fp16")];
+            int32 gather_12_axis_0 = const()[name = string("gather_12_axis_0"), val = int32(0)];
+            int32 gather_12_batch_dims_0 = const()[name = string("gather_12_batch_dims_0"), val = int32(0)];
+            bool gather_12_validate_indices_0 = const()[name = string("gather_12_validate_indices_0"), val = bool(false)];
+            string var_342_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_342_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_12_to_uint16 = const()[name = string("select_12_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_342_shape_cast_fp16_to_uint16 = cast(dtype = var_342_shape_cast_fp16_to_uint16_dtype_0, x = var_342_shape_cast_fp16)[name = string("cast_175")];
+            uint16 gather_12_cast_uint16 = gather(axis = gather_12_axis_0, batch_dims = gather_12_batch_dims_0, indices = select_12_to_uint16, validate_indices = gather_12_validate_indices_0, x = var_342_shape_cast_fp16_to_uint16)[name = string("gather_12_cast_uint16")];
+            string gather_12_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_12_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_59_axes_0 = const()[name = string("expand_dims_59_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_12_cast_uint16_to_int32 = cast(dtype = gather_12_cast_uint16_to_int32_dtype_0, x = gather_12_cast_uint16)[name = string("cast_174")];
+            tensor<int32, [1]> expand_dims_59 = expand_dims(axes = expand_dims_59_axes_0, x = gather_12_cast_uint16_to_int32)[name = string("expand_dims_59")];
+            tensor<int32, [4]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [1]> concat_42_values0_0 = const()[name = string("concat_42_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_42_values1_0 = const()[name = string("concat_42_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_42_values3_0 = const()[name = string("concat_42_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)];
+            bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (concat_42_values0_0, concat_42_values1_0, expand_dims_59, concat_42_values3_0))[name = string("concat_42")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_41, begin_mask = k_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_42, end_mask = k_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_7_stride_0, update = linear_12_cast_fp16, x = coreml_update_state_78)[name = string("k_cache2_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_7_cast_fp16, input = k_cache2)[name = string("coreml_update_state_80_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_80 = read_state(input = k_cache2)[name = string("coreml_update_state_80")];
+            tensor<int32, [3]> var_347_shape_cast_fp16 = shape(x = linear_13_cast_fp16)[name = string("op_347_shape_cast_fp16")];
+            int32 gather_13_axis_0 = const()[name = string("gather_13_axis_0"), val = int32(0)];
+            int32 gather_13_batch_dims_0 = const()[name = string("gather_13_batch_dims_0"), val = int32(0)];
+            bool gather_13_validate_indices_0 = const()[name = string("gather_13_validate_indices_0"), val = bool(false)];
+            string var_347_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_347_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_13_to_uint16 = const()[name = string("select_13_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_347_shape_cast_fp16_to_uint16 = cast(dtype = var_347_shape_cast_fp16_to_uint16_dtype_0, x = var_347_shape_cast_fp16)[name = string("cast_173")];
+            uint16 gather_13_cast_uint16 = gather(axis = gather_13_axis_0, batch_dims = gather_13_batch_dims_0, indices = select_13_to_uint16, validate_indices = gather_13_validate_indices_0, x = var_347_shape_cast_fp16_to_uint16)[name = string("gather_13_cast_uint16")];
+            string gather_13_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_13_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_63_axes_0 = const()[name = string("expand_dims_63_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_13_cast_uint16_to_int32 = cast(dtype = gather_13_cast_uint16_to_int32_dtype_0, x = gather_13_cast_uint16)[name = string("cast_172")];
+            tensor<int32, [1]> expand_dims_63 = expand_dims(axes = expand_dims_63_axes_0, x = gather_13_cast_uint16_to_int32)[name = string("expand_dims_63")];
+            tensor<int32, [4]> concat_44 = const()[name = string("concat_44"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [1]> concat_45_values0_0 = const()[name = string("concat_45_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_45_values1_0 = const()[name = string("concat_45_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_45_values3_0 = const()[name = string("concat_45_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_45_axis_0 = const()[name = string("concat_45_axis_0"), val = int32(0)];
+            bool concat_45_interleave_0 = const()[name = string("concat_45_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_45 = concat(axis = concat_45_axis_0, interleave = concat_45_interleave_0, values = (concat_45_values0_0, concat_45_values1_0, expand_dims_63, concat_45_values3_0))[name = string("concat_45")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_44, begin_mask = v_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_45, end_mask = v_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_7_stride_0, update = linear_13_cast_fp16, x = coreml_update_state_79)[name = string("v_cache2_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_7_cast_fp16, input = v_cache2)[name = string("coreml_update_state_81_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_81 = read_state(input = v_cache2)[name = string("coreml_update_state_81")];
+            tensor<fp16, [1280, 1280]> var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82597376)))];
+            tensor<fp16, [1, ?, 1280]> linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_369_to_fp16, x = audio_data)[name = string("linear_14_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_373_to_fp16 = const()[name = string("op_373_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85874240)))];
+            tensor<fp16, [1280]> var_374_to_fp16 = const()[name = string("op_374_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89151104)))];
+            tensor<fp16, [1, ?, 1280]> linear_15_cast_fp16 = linear(bias = var_374_to_fp16, weight = var_373_to_fp16, x = audio_data)[name = string("linear_15_cast_fp16")];
+            tensor<int32, [3]> var_376_shape_cast_fp16 = shape(x = linear_14_cast_fp16)[name = string("op_376_shape_cast_fp16")];
+            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
+            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
+            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
+            string var_376_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_376_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_376_shape_cast_fp16_to_uint16 = cast(dtype = var_376_shape_cast_fp16_to_uint16_dtype_0, x = var_376_shape_cast_fp16)[name = string("cast_171")];
+            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_376_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
+            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_170")];
+            tensor<int32, [1]> expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = gather_14_cast_uint16_to_int32)[name = string("expand_dims_67")];
+            tensor<int32, [4]> concat_47 = const()[name = string("concat_47"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_48_values1_0 = const()[name = string("concat_48_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_48_values3_0 = const()[name = string("concat_48_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
+            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, concat_48_values1_0, expand_dims_67, concat_48_values3_0))[name = string("concat_48")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_47, begin_mask = k_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_48, end_mask = k_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_8_stride_0, update = linear_14_cast_fp16, x = coreml_update_state_80)[name = string("k_cache2_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_8_cast_fp16, input = k_cache2)[name = string("coreml_update_state_82_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_82 = read_state(input = k_cache2)[name = string("coreml_update_state_82")];
+            tensor<int32, [3]> var_381_shape_cast_fp16 = shape(x = linear_15_cast_fp16)[name = string("op_381_shape_cast_fp16")];
+            int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)];
+            int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)];
+            bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)];
+            string var_381_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_381_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_381_shape_cast_fp16_to_uint16 = cast(dtype = var_381_shape_cast_fp16_to_uint16_dtype_0, x = var_381_shape_cast_fp16)[name = string("cast_169")];
+            uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_381_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")];
+            string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_71_axes_0 = const()[name = string("expand_dims_71_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_168")];
+            tensor<int32, [1]> expand_dims_71 = expand_dims(axes = expand_dims_71_axes_0, x = gather_15_cast_uint16_to_int32)[name = string("expand_dims_71")];
+            tensor<int32, [4]> concat_50 = const()[name = string("concat_50"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [1]> concat_51_values0_0 = const()[name = string("concat_51_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)];
+            bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (concat_51_values0_0, concat_51_values1_0, expand_dims_71, concat_51_values3_0))[name = string("concat_51")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_50, begin_mask = v_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_51, end_mask = v_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_8_stride_0, update = linear_15_cast_fp16, x = coreml_update_state_81)[name = string("v_cache2_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_8_cast_fp16, input = v_cache2)[name = string("coreml_update_state_83_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_83 = read_state(input = v_cache2)[name = string("coreml_update_state_83")];
+            tensor<fp16, [1280, 1280]> var_403_to_fp16 = const()[name = string("op_403_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89153728)))];
+            tensor<fp16, [1, ?, 1280]> linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_403_to_fp16, x = audio_data)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_407_to_fp16 = const()[name = string("op_407_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92430592)))];
+            tensor<fp16, [1280]> var_408_to_fp16 = const()[name = string("op_408_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95707456)))];
+            tensor<fp16, [1, ?, 1280]> linear_17_cast_fp16 = linear(bias = var_408_to_fp16, weight = var_407_to_fp16, x = audio_data)[name = string("linear_17_cast_fp16")];
+            tensor<int32, [3]> var_410_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_410_shape_cast_fp16")];
+            int32 gather_16_axis_0 = const()[name = string("gather_16_axis_0"), val = int32(0)];
+            int32 gather_16_batch_dims_0 = const()[name = string("gather_16_batch_dims_0"), val = int32(0)];
+            bool gather_16_validate_indices_0 = const()[name = string("gather_16_validate_indices_0"), val = bool(false)];
+            string var_410_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_410_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_16_to_uint16 = const()[name = string("select_16_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_410_shape_cast_fp16_to_uint16 = cast(dtype = var_410_shape_cast_fp16_to_uint16_dtype_0, x = var_410_shape_cast_fp16)[name = string("cast_167")];
+            uint16 gather_16_cast_uint16 = gather(axis = gather_16_axis_0, batch_dims = gather_16_batch_dims_0, indices = select_16_to_uint16, validate_indices = gather_16_validate_indices_0, x = var_410_shape_cast_fp16_to_uint16)[name = string("gather_16_cast_uint16")];
+            string gather_16_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_16_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_75_axes_0 = const()[name = string("expand_dims_75_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_16_cast_uint16_to_int32 = cast(dtype = gather_16_cast_uint16_to_int32_dtype_0, x = gather_16_cast_uint16)[name = string("cast_166")];
+            tensor<int32, [1]> expand_dims_75 = expand_dims(axes = expand_dims_75_axes_0, x = gather_16_cast_uint16_to_int32)[name = string("expand_dims_75")];
+            tensor<int32, [4]> concat_53 = const()[name = string("concat_53"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [1]> concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_54_values1_0 = const()[name = string("concat_54_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_54_values3_0 = const()[name = string("concat_54_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
+            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, concat_54_values1_0, expand_dims_75, concat_54_values3_0))[name = string("concat_54")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_53, begin_mask = k_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_54, end_mask = k_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_9_stride_0, update = linear_16_cast_fp16, x = coreml_update_state_82)[name = string("k_cache2_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_9_cast_fp16, input = k_cache2)[name = string("coreml_update_state_84_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_84 = read_state(input = k_cache2)[name = string("coreml_update_state_84")];
+            tensor<int32, [3]> var_415_shape_cast_fp16 = shape(x = linear_17_cast_fp16)[name = string("op_415_shape_cast_fp16")];
+            int32 gather_17_axis_0 = const()[name = string("gather_17_axis_0"), val = int32(0)];
+            int32 gather_17_batch_dims_0 = const()[name = string("gather_17_batch_dims_0"), val = int32(0)];
+            bool gather_17_validate_indices_0 = const()[name = string("gather_17_validate_indices_0"), val = bool(false)];
+            string var_415_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_415_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_17_to_uint16 = const()[name = string("select_17_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_415_shape_cast_fp16_to_uint16 = cast(dtype = var_415_shape_cast_fp16_to_uint16_dtype_0, x = var_415_shape_cast_fp16)[name = string("cast_165")];
+            uint16 gather_17_cast_uint16 = gather(axis = gather_17_axis_0, batch_dims = gather_17_batch_dims_0, indices = select_17_to_uint16, validate_indices = gather_17_validate_indices_0, x = var_415_shape_cast_fp16_to_uint16)[name = string("gather_17_cast_uint16")];
+            string gather_17_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_17_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_79_axes_0 = const()[name = string("expand_dims_79_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_17_cast_uint16_to_int32 = cast(dtype = gather_17_cast_uint16_to_int32_dtype_0, x = gather_17_cast_uint16)[name = string("cast_164")];
+            tensor<int32, [1]> expand_dims_79 = expand_dims(axes = expand_dims_79_axes_0, x = gather_17_cast_uint16_to_int32)[name = string("expand_dims_79")];
+            tensor<int32, [4]> concat_56 = const()[name = string("concat_56"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [1]> concat_57_values0_0 = const()[name = string("concat_57_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)];
+            bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (concat_57_values0_0, concat_57_values1_0, expand_dims_79, concat_57_values3_0))[name = string("concat_57")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_56, begin_mask = v_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_57, end_mask = v_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_9_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_83)[name = string("v_cache2_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_9_cast_fp16, input = v_cache2)[name = string("coreml_update_state_85_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_85 = read_state(input = v_cache2)[name = string("coreml_update_state_85")];
+            tensor<fp16, [1280, 1280]> var_437_to_fp16 = const()[name = string("op_437_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95710080)))];
+            tensor<fp16, [1, ?, 1280]> linear_18_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_437_to_fp16, x = audio_data)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_441_to_fp16 = const()[name = string("op_441_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98986944)))];
+            tensor<fp16, [1280]> var_442_to_fp16 = const()[name = string("op_442_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102263808)))];
+            tensor<fp16, [1, ?, 1280]> linear_19_cast_fp16 = linear(bias = var_442_to_fp16, weight = var_441_to_fp16, x = audio_data)[name = string("linear_19_cast_fp16")];
+            tensor<int32, [3]> var_444_shape_cast_fp16 = shape(x = linear_18_cast_fp16)[name = string("op_444_shape_cast_fp16")];
+            int32 gather_18_axis_0 = const()[name = string("gather_18_axis_0"), val = int32(0)];
+            int32 gather_18_batch_dims_0 = const()[name = string("gather_18_batch_dims_0"), val = int32(0)];
+            bool gather_18_validate_indices_0 = const()[name = string("gather_18_validate_indices_0"), val = bool(false)];
+            string var_444_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_444_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_18_to_uint16 = const()[name = string("select_18_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_444_shape_cast_fp16_to_uint16 = cast(dtype = var_444_shape_cast_fp16_to_uint16_dtype_0, x = var_444_shape_cast_fp16)[name = string("cast_163")];
+            uint16 gather_18_cast_uint16 = gather(axis = gather_18_axis_0, batch_dims = gather_18_batch_dims_0, indices = select_18_to_uint16, validate_indices = gather_18_validate_indices_0, x = var_444_shape_cast_fp16_to_uint16)[name = string("gather_18_cast_uint16")];
+            string gather_18_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_18_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_18_cast_uint16_to_int32 = cast(dtype = gather_18_cast_uint16_to_int32_dtype_0, x = gather_18_cast_uint16)[name = string("cast_162")];
+            tensor<int32, [1]> expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = gather_18_cast_uint16_to_int32)[name = string("expand_dims_83")];
+            tensor<int32, [4]> concat_59 = const()[name = string("concat_59"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [1]> concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
+            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, concat_60_values1_0, expand_dims_83, concat_60_values3_0))[name = string("concat_60")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_59, begin_mask = k_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_60, end_mask = k_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_10_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_84)[name = string("k_cache2_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_10_cast_fp16, input = k_cache2)[name = string("coreml_update_state_86_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_86 = read_state(input = k_cache2)[name = string("coreml_update_state_86")];
+            tensor<int32, [3]> var_449_shape_cast_fp16 = shape(x = linear_19_cast_fp16)[name = string("op_449_shape_cast_fp16")];
+            int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)];
+            int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)];
+            bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)];
+            string var_449_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_449_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_449_shape_cast_fp16_to_uint16 = cast(dtype = var_449_shape_cast_fp16_to_uint16_dtype_0, x = var_449_shape_cast_fp16)[name = string("cast_161")];
+            uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_449_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")];
+            string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_87_axes_0 = const()[name = string("expand_dims_87_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_160")];
+            tensor<int32, [1]> expand_dims_87 = expand_dims(axes = expand_dims_87_axes_0, x = gather_19_cast_uint16_to_int32)[name = string("expand_dims_87")];
+            tensor<int32, [4]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [1]> concat_63_values0_0 = const()[name = string("concat_63_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)];
+            bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (concat_63_values0_0, concat_63_values1_0, expand_dims_87, concat_63_values3_0))[name = string("concat_63")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_62, begin_mask = v_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_63, end_mask = v_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_10_stride_0, update = linear_19_cast_fp16, x = coreml_update_state_85)[name = string("v_cache2_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_10_cast_fp16, input = v_cache2)[name = string("coreml_update_state_87_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_87 = read_state(input = v_cache2)[name = string("coreml_update_state_87")];
+            tensor<fp16, [1280, 1280]> var_471_to_fp16 = const()[name = string("op_471_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102266432)))];
+            tensor<fp16, [1, ?, 1280]> linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_471_to_fp16, x = audio_data)[name = string("linear_20_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105543296)))];
+            tensor<fp16, [1280]> var_476_to_fp16 = const()[name = string("op_476_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108820160)))];
+            tensor<fp16, [1, ?, 1280]> linear_21_cast_fp16 = linear(bias = var_476_to_fp16, weight = var_475_to_fp16, x = audio_data)[name = string("linear_21_cast_fp16")];
+            tensor<int32, [3]> var_478_shape_cast_fp16 = shape(x = linear_20_cast_fp16)[name = string("op_478_shape_cast_fp16")];
+            int32 gather_20_axis_0 = const()[name = string("gather_20_axis_0"), val = int32(0)];
+            int32 gather_20_batch_dims_0 = const()[name = string("gather_20_batch_dims_0"), val = int32(0)];
+            bool gather_20_validate_indices_0 = const()[name = string("gather_20_validate_indices_0"), val = bool(false)];
+            string var_478_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_478_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_20_to_uint16 = const()[name = string("select_20_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_478_shape_cast_fp16_to_uint16 = cast(dtype = var_478_shape_cast_fp16_to_uint16_dtype_0, x = var_478_shape_cast_fp16)[name = string("cast_159")];
+            uint16 gather_20_cast_uint16 = gather(axis = gather_20_axis_0, batch_dims = gather_20_batch_dims_0, indices = select_20_to_uint16, validate_indices = gather_20_validate_indices_0, x = var_478_shape_cast_fp16_to_uint16)[name = string("gather_20_cast_uint16")];
+            string gather_20_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_20_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_91_axes_0 = const()[name = string("expand_dims_91_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_20_cast_uint16_to_int32 = cast(dtype = gather_20_cast_uint16_to_int32_dtype_0, x = gather_20_cast_uint16)[name = string("cast_158")];
+            tensor<int32, [1]> expand_dims_91 = expand_dims(axes = expand_dims_91_axes_0, x = gather_20_cast_uint16_to_int32)[name = string("expand_dims_91")];
+            tensor<int32, [4]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [1]> concat_66_values0_0 = const()[name = string("concat_66_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_66_values1_0 = const()[name = string("concat_66_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_66_values3_0 = const()[name = string("concat_66_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)];
+            bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (concat_66_values0_0, concat_66_values1_0, expand_dims_91, concat_66_values3_0))[name = string("concat_66")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_65, begin_mask = k_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_66, end_mask = k_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_11_stride_0, update = linear_20_cast_fp16, x = coreml_update_state_86)[name = string("k_cache2_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_11_cast_fp16, input = k_cache2)[name = string("coreml_update_state_88_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_88 = read_state(input = k_cache2)[name = string("coreml_update_state_88")];
+            tensor<int32, [3]> var_483_shape_cast_fp16 = shape(x = linear_21_cast_fp16)[name = string("op_483_shape_cast_fp16")];
+            int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)];
+            int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)];
+            bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)];
+            string var_483_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_483_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_483_shape_cast_fp16_to_uint16 = cast(dtype = var_483_shape_cast_fp16_to_uint16_dtype_0, x = var_483_shape_cast_fp16)[name = string("cast_157")];
+            uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_483_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")];
+            string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_95_axes_0 = const()[name = string("expand_dims_95_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_156")];
+            tensor<int32, [1]> expand_dims_95 = expand_dims(axes = expand_dims_95_axes_0, x = gather_21_cast_uint16_to_int32)[name = string("expand_dims_95")];
+            tensor<int32, [4]> concat_68 = const()[name = string("concat_68"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [1]> concat_69_values0_0 = const()[name = string("concat_69_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_69_values1_0 = const()[name = string("concat_69_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_69_values3_0 = const()[name = string("concat_69_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_69_axis_0 = const()[name = string("concat_69_axis_0"), val = int32(0)];
+            bool concat_69_interleave_0 = const()[name = string("concat_69_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_69 = concat(axis = concat_69_axis_0, interleave = concat_69_interleave_0, values = (concat_69_values0_0, concat_69_values1_0, expand_dims_95, concat_69_values3_0))[name = string("concat_69")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_68, begin_mask = v_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_69, end_mask = v_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_11_stride_0, update = linear_21_cast_fp16, x = coreml_update_state_87)[name = string("v_cache2_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_11_cast_fp16, input = v_cache2)[name = string("coreml_update_state_89_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_89 = read_state(input = v_cache2)[name = string("coreml_update_state_89")];
+            tensor<fp16, [1280, 1280]> var_505_to_fp16 = const()[name = string("op_505_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108822784)))];
+            tensor<fp16, [1, ?, 1280]> linear_22_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_505_to_fp16, x = audio_data)[name = string("linear_22_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_509_to_fp16 = const()[name = string("op_509_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112099648)))];
+            tensor<fp16, [1280]> var_510_to_fp16 = const()[name = string("op_510_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115376512)))];
+            tensor<fp16, [1, ?, 1280]> linear_23_cast_fp16 = linear(bias = var_510_to_fp16, weight = var_509_to_fp16, x = audio_data)[name = string("linear_23_cast_fp16")];
+            tensor<int32, [3]> var_512_shape_cast_fp16 = shape(x = linear_22_cast_fp16)[name = string("op_512_shape_cast_fp16")];
+            int32 gather_22_axis_0 = const()[name = string("gather_22_axis_0"), val = int32(0)];
+            int32 gather_22_batch_dims_0 = const()[name = string("gather_22_batch_dims_0"), val = int32(0)];
+            bool gather_22_validate_indices_0 = const()[name = string("gather_22_validate_indices_0"), val = bool(false)];
+            string var_512_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_512_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_22_to_uint16 = const()[name = string("select_22_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_512_shape_cast_fp16_to_uint16 = cast(dtype = var_512_shape_cast_fp16_to_uint16_dtype_0, x = var_512_shape_cast_fp16)[name = string("cast_155")];
+            uint16 gather_22_cast_uint16 = gather(axis = gather_22_axis_0, batch_dims = gather_22_batch_dims_0, indices = select_22_to_uint16, validate_indices = gather_22_validate_indices_0, x = var_512_shape_cast_fp16_to_uint16)[name = string("gather_22_cast_uint16")];
+            string gather_22_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_22_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_22_cast_uint16_to_int32 = cast(dtype = gather_22_cast_uint16_to_int32_dtype_0, x = gather_22_cast_uint16)[name = string("cast_154")];
+            tensor<int32, [1]> expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = gather_22_cast_uint16_to_int32)[name = string("expand_dims_99")];
+            tensor<int32, [4]> concat_71 = const()[name = string("concat_71"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [1]> concat_72_values0_0 = const()[name = string("concat_72_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_72_values1_0 = const()[name = string("concat_72_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_72_values3_0 = const()[name = string("concat_72_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)];
+            bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (concat_72_values0_0, concat_72_values1_0, expand_dims_99, concat_72_values3_0))[name = string("concat_72")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_71, begin_mask = k_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_72, end_mask = k_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_12_stride_0, update = linear_22_cast_fp16, x = coreml_update_state_88)[name = string("k_cache2_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_12_cast_fp16, input = k_cache2)[name = string("coreml_update_state_90_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_90 = read_state(input = k_cache2)[name = string("coreml_update_state_90")];
+            tensor<int32, [3]> var_517_shape_cast_fp16 = shape(x = linear_23_cast_fp16)[name = string("op_517_shape_cast_fp16")];
+            int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)];
+            int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)];
+            bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)];
+            string var_517_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_517_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_517_shape_cast_fp16_to_uint16 = cast(dtype = var_517_shape_cast_fp16_to_uint16_dtype_0, x = var_517_shape_cast_fp16)[name = string("cast_153")];
+            uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_517_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")];
+            string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_103_axes_0 = const()[name = string("expand_dims_103_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_152")];
+            tensor<int32, [1]> expand_dims_103 = expand_dims(axes = expand_dims_103_axes_0, x = gather_23_cast_uint16_to_int32)[name = string("expand_dims_103")];
+            tensor<int32, [4]> concat_74 = const()[name = string("concat_74"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [1]> concat_75_values0_0 = const()[name = string("concat_75_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)];
+            bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (concat_75_values0_0, concat_75_values1_0, expand_dims_103, concat_75_values3_0))[name = string("concat_75")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_74, begin_mask = v_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_75, end_mask = v_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_12_stride_0, update = linear_23_cast_fp16, x = coreml_update_state_89)[name = string("v_cache2_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_12_cast_fp16, input = v_cache2)[name = string("coreml_update_state_91_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_91 = read_state(input = v_cache2)[name = string("coreml_update_state_91")];
+            tensor<fp16, [1280, 1280]> var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115379136)))];
+            tensor<fp16, [1, ?, 1280]> linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_539_to_fp16, x = audio_data)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_543_to_fp16 = const()[name = string("op_543_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118656000)))];
+            tensor<fp16, [1280]> var_544_to_fp16 = const()[name = string("op_544_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121932864)))];
+            tensor<fp16, [1, ?, 1280]> linear_25_cast_fp16 = linear(bias = var_544_to_fp16, weight = var_543_to_fp16, x = audio_data)[name = string("linear_25_cast_fp16")];
+            tensor<int32, [3]> var_546_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_546_shape_cast_fp16")];
+            int32 gather_24_axis_0 = const()[name = string("gather_24_axis_0"), val = int32(0)];
+            int32 gather_24_batch_dims_0 = const()[name = string("gather_24_batch_dims_0"), val = int32(0)];
+            bool gather_24_validate_indices_0 = const()[name = string("gather_24_validate_indices_0"), val = bool(false)];
+            string var_546_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_546_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_24_to_uint16 = const()[name = string("select_24_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_546_shape_cast_fp16_to_uint16 = cast(dtype = var_546_shape_cast_fp16_to_uint16_dtype_0, x = var_546_shape_cast_fp16)[name = string("cast_151")];
+            uint16 gather_24_cast_uint16 = gather(axis = gather_24_axis_0, batch_dims = gather_24_batch_dims_0, indices = select_24_to_uint16, validate_indices = gather_24_validate_indices_0, x = var_546_shape_cast_fp16_to_uint16)[name = string("gather_24_cast_uint16")];
+            string gather_24_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_24_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_107_axes_0 = const()[name = string("expand_dims_107_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_24_cast_uint16_to_int32 = cast(dtype = gather_24_cast_uint16_to_int32_dtype_0, x = gather_24_cast_uint16)[name = string("cast_150")];
+            tensor<int32, [1]> expand_dims_107 = expand_dims(axes = expand_dims_107_axes_0, x = gather_24_cast_uint16_to_int32)[name = string("expand_dims_107")];
+            tensor<int32, [4]> concat_77 = const()[name = string("concat_77"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [1]> concat_78_values0_0 = const()[name = string("concat_78_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_78_values1_0 = const()[name = string("concat_78_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_78_values3_0 = const()[name = string("concat_78_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)];
+            bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (concat_78_values0_0, concat_78_values1_0, expand_dims_107, concat_78_values3_0))[name = string("concat_78")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_77, begin_mask = k_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_78, end_mask = k_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_13_stride_0, update = linear_24_cast_fp16, x = coreml_update_state_90)[name = string("k_cache2_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_13_cast_fp16, input = k_cache2)[name = string("coreml_update_state_92_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_92 = read_state(input = k_cache2)[name = string("coreml_update_state_92")];
+            tensor<int32, [3]> var_551_shape_cast_fp16 = shape(x = linear_25_cast_fp16)[name = string("op_551_shape_cast_fp16")];
+            int32 gather_25_axis_0 = const()[name = string("gather_25_axis_0"), val = int32(0)];
+            int32 gather_25_batch_dims_0 = const()[name = string("gather_25_batch_dims_0"), val = int32(0)];
+            bool gather_25_validate_indices_0 = const()[name = string("gather_25_validate_indices_0"), val = bool(false)];
+            string var_551_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_551_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_25_to_uint16 = const()[name = string("select_25_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_551_shape_cast_fp16_to_uint16 = cast(dtype = var_551_shape_cast_fp16_to_uint16_dtype_0, x = var_551_shape_cast_fp16)[name = string("cast_149")];
+            uint16 gather_25_cast_uint16 = gather(axis = gather_25_axis_0, batch_dims = gather_25_batch_dims_0, indices = select_25_to_uint16, validate_indices = gather_25_validate_indices_0, x = var_551_shape_cast_fp16_to_uint16)[name = string("gather_25_cast_uint16")];
+            string gather_25_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_25_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_111_axes_0 = const()[name = string("expand_dims_111_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_25_cast_uint16_to_int32 = cast(dtype = gather_25_cast_uint16_to_int32_dtype_0, x = gather_25_cast_uint16)[name = string("cast_148")];
+            tensor<int32, [1]> expand_dims_111 = expand_dims(axes = expand_dims_111_axes_0, x = gather_25_cast_uint16_to_int32)[name = string("expand_dims_111")];
+            tensor<int32, [4]> concat_80 = const()[name = string("concat_80"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [1]> concat_81_values0_0 = const()[name = string("concat_81_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_81_values3_0 = const()[name = string("concat_81_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
+            bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (concat_81_values0_0, concat_81_values1_0, expand_dims_111, concat_81_values3_0))[name = string("concat_81")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_80, begin_mask = v_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_81, end_mask = v_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_13_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_91)[name = string("v_cache2_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_13_cast_fp16, input = v_cache2)[name = string("coreml_update_state_93_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_93 = read_state(input = v_cache2)[name = string("coreml_update_state_93")];
+            tensor<fp16, [1280, 1280]> var_573_to_fp16 = const()[name = string("op_573_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121935488)))];
+            tensor<fp16, [1, ?, 1280]> linear_26_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_573_to_fp16, x = audio_data)[name = string("linear_26_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_577_to_fp16 = const()[name = string("op_577_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125212352)))];
+            tensor<fp16, [1280]> var_578_to_fp16 = const()[name = string("op_578_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128489216)))];
+            tensor<fp16, [1, ?, 1280]> linear_27_cast_fp16 = linear(bias = var_578_to_fp16, weight = var_577_to_fp16, x = audio_data)[name = string("linear_27_cast_fp16")];
+            tensor<int32, [3]> var_580_shape_cast_fp16 = shape(x = linear_26_cast_fp16)[name = string("op_580_shape_cast_fp16")];
+            int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
+            int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
+            bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
+            string var_580_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_580_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_580_shape_cast_fp16_to_uint16 = cast(dtype = var_580_shape_cast_fp16_to_uint16_dtype_0, x = var_580_shape_cast_fp16)[name = string("cast_147")];
+            uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_580_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
+            string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_146")];
+            tensor<int32, [1]> expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = gather_26_cast_uint16_to_int32)[name = string("expand_dims_115")];
+            tensor<int32, [4]> concat_83 = const()[name = string("concat_83"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [1]> concat_84_values0_0 = const()[name = string("concat_84_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)];
+            bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (concat_84_values0_0, concat_84_values1_0, expand_dims_115, concat_84_values3_0))[name = string("concat_84")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_83, begin_mask = k_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_84, end_mask = k_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_14_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_92)[name = string("k_cache2_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_14_cast_fp16, input = k_cache2)[name = string("coreml_update_state_94_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_94 = read_state(input = k_cache2)[name = string("coreml_update_state_94")];
+            tensor<int32, [3]> var_585_shape_cast_fp16 = shape(x = linear_27_cast_fp16)[name = string("op_585_shape_cast_fp16")];
+            int32 gather_27_axis_0 = const()[name = string("gather_27_axis_0"), val = int32(0)];
+            int32 gather_27_batch_dims_0 = const()[name = string("gather_27_batch_dims_0"), val = int32(0)];
+            bool gather_27_validate_indices_0 = const()[name = string("gather_27_validate_indices_0"), val = bool(false)];
+            string var_585_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_585_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_27_to_uint16 = const()[name = string("select_27_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_585_shape_cast_fp16_to_uint16 = cast(dtype = var_585_shape_cast_fp16_to_uint16_dtype_0, x = var_585_shape_cast_fp16)[name = string("cast_145")];
+            uint16 gather_27_cast_uint16 = gather(axis = gather_27_axis_0, batch_dims = gather_27_batch_dims_0, indices = select_27_to_uint16, validate_indices = gather_27_validate_indices_0, x = var_585_shape_cast_fp16_to_uint16)[name = string("gather_27_cast_uint16")];
+            string gather_27_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_27_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_119_axes_0 = const()[name = string("expand_dims_119_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_27_cast_uint16_to_int32 = cast(dtype = gather_27_cast_uint16_to_int32_dtype_0, x = gather_27_cast_uint16)[name = string("cast_144")];
+            tensor<int32, [1]> expand_dims_119 = expand_dims(axes = expand_dims_119_axes_0, x = gather_27_cast_uint16_to_int32)[name = string("expand_dims_119")];
+            tensor<int32, [4]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [1]> concat_87_values0_0 = const()[name = string("concat_87_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)];
+            bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (concat_87_values0_0, concat_87_values1_0, expand_dims_119, concat_87_values3_0))[name = string("concat_87")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_87, end_mask = v_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_14_stride_0, update = linear_27_cast_fp16, x = coreml_update_state_93)[name = string("v_cache2_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_14_cast_fp16, input = v_cache2)[name = string("coreml_update_state_95_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_95 = read_state(input = v_cache2)[name = string("coreml_update_state_95")];
+            tensor<fp16, [1280, 1280]> var_607_to_fp16 = const()[name = string("op_607_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128491840)))];
+            tensor<fp16, [1, ?, 1280]> linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_607_to_fp16, x = audio_data)[name = string("linear_28_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_611_to_fp16 = const()[name = string("op_611_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131768704)))];
+            tensor<fp16, [1280]> var_612_to_fp16 = const()[name = string("op_612_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135045568)))];
+            tensor<fp16, [1, ?, 1280]> linear_29_cast_fp16 = linear(bias = var_612_to_fp16, weight = var_611_to_fp16, x = audio_data)[name = string("linear_29_cast_fp16")];
+            tensor<int32, [3]> var_614_shape_cast_fp16 = shape(x = linear_28_cast_fp16)[name = string("op_614_shape_cast_fp16")];
+            int32 gather_28_axis_0 = const()[name = string("gather_28_axis_0"), val = int32(0)];
+            int32 gather_28_batch_dims_0 = const()[name = string("gather_28_batch_dims_0"), val = int32(0)];
+            bool gather_28_validate_indices_0 = const()[name = string("gather_28_validate_indices_0"), val = bool(false)];
+            string var_614_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_614_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_28_to_uint16 = const()[name = string("select_28_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_614_shape_cast_fp16_to_uint16 = cast(dtype = var_614_shape_cast_fp16_to_uint16_dtype_0, x = var_614_shape_cast_fp16)[name = string("cast_143")];
+            uint16 gather_28_cast_uint16 = gather(axis = gather_28_axis_0, batch_dims = gather_28_batch_dims_0, indices = select_28_to_uint16, validate_indices = gather_28_validate_indices_0, x = var_614_shape_cast_fp16_to_uint16)[name = string("gather_28_cast_uint16")];
+            string gather_28_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_28_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_123_axes_0 = const()[name = string("expand_dims_123_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_28_cast_uint16_to_int32 = cast(dtype = gather_28_cast_uint16_to_int32_dtype_0, x = gather_28_cast_uint16)[name = string("cast_142")];
+            tensor<int32, [1]> expand_dims_123 = expand_dims(axes = expand_dims_123_axes_0, x = gather_28_cast_uint16_to_int32)[name = string("expand_dims_123")];
+            tensor<int32, [4]> concat_89 = const()[name = string("concat_89"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [1]> concat_90_values0_0 = const()[name = string("concat_90_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_90_values1_0 = const()[name = string("concat_90_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_90_values3_0 = const()[name = string("concat_90_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)];
+            bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (concat_90_values0_0, concat_90_values1_0, expand_dims_123, concat_90_values3_0))[name = string("concat_90")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_89, begin_mask = k_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_90, end_mask = k_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_15_stride_0, update = linear_28_cast_fp16, x = coreml_update_state_94)[name = string("k_cache2_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_15_cast_fp16, input = k_cache2)[name = string("coreml_update_state_96_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_96 = read_state(input = k_cache2)[name = string("coreml_update_state_96")];
+            tensor<int32, [3]> var_619_shape_cast_fp16 = shape(x = linear_29_cast_fp16)[name = string("op_619_shape_cast_fp16")];
+            int32 gather_29_axis_0 = const()[name = string("gather_29_axis_0"), val = int32(0)];
+            int32 gather_29_batch_dims_0 = const()[name = string("gather_29_batch_dims_0"), val = int32(0)];
+            bool gather_29_validate_indices_0 = const()[name = string("gather_29_validate_indices_0"), val = bool(false)];
+            string var_619_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_619_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_29_to_uint16 = const()[name = string("select_29_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_619_shape_cast_fp16_to_uint16 = cast(dtype = var_619_shape_cast_fp16_to_uint16_dtype_0, x = var_619_shape_cast_fp16)[name = string("cast_141")];
+            uint16 gather_29_cast_uint16 = gather(axis = gather_29_axis_0, batch_dims = gather_29_batch_dims_0, indices = select_29_to_uint16, validate_indices = gather_29_validate_indices_0, x = var_619_shape_cast_fp16_to_uint16)[name = string("gather_29_cast_uint16")];
+            string gather_29_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_29_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_127_axes_0 = const()[name = string("expand_dims_127_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_29_cast_uint16_to_int32 = cast(dtype = gather_29_cast_uint16_to_int32_dtype_0, x = gather_29_cast_uint16)[name = string("cast_140")];
+            tensor<int32, [1]> expand_dims_127 = expand_dims(axes = expand_dims_127_axes_0, x = gather_29_cast_uint16_to_int32)[name = string("expand_dims_127")];
+            tensor<int32, [4]> concat_92 = const()[name = string("concat_92"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [1]> concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
+            bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_127, concat_93_values3_0))[name = string("concat_93")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_93, end_mask = v_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_15_stride_0, update = linear_29_cast_fp16, x = coreml_update_state_95)[name = string("v_cache2_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_15_cast_fp16, input = v_cache2)[name = string("coreml_update_state_97_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_97 = read_state(input = v_cache2)[name = string("coreml_update_state_97")];
+            tensor<fp16, [1280, 1280]> var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135048192)))];
+            tensor<fp16, [1, ?, 1280]> linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_641_to_fp16, x = audio_data)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_645_to_fp16 = const()[name = string("op_645_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138325056)))];
+            tensor<fp16, [1280]> var_646_to_fp16 = const()[name = string("op_646_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141601920)))];
+            tensor<fp16, [1, ?, 1280]> linear_31_cast_fp16 = linear(bias = var_646_to_fp16, weight = var_645_to_fp16, x = audio_data)[name = string("linear_31_cast_fp16")];
+            tensor<int32, [3]> var_648_shape_cast_fp16 = shape(x = linear_30_cast_fp16)[name = string("op_648_shape_cast_fp16")];
+            int32 gather_30_axis_0 = const()[name = string("gather_30_axis_0"), val = int32(0)];
+            int32 gather_30_batch_dims_0 = const()[name = string("gather_30_batch_dims_0"), val = int32(0)];
+            bool gather_30_validate_indices_0 = const()[name = string("gather_30_validate_indices_0"), val = bool(false)];
+            string var_648_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_648_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_30_to_uint16 = const()[name = string("select_30_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_648_shape_cast_fp16_to_uint16 = cast(dtype = var_648_shape_cast_fp16_to_uint16_dtype_0, x = var_648_shape_cast_fp16)[name = string("cast_139")];
+            uint16 gather_30_cast_uint16 = gather(axis = gather_30_axis_0, batch_dims = gather_30_batch_dims_0, indices = select_30_to_uint16, validate_indices = gather_30_validate_indices_0, x = var_648_shape_cast_fp16_to_uint16)[name = string("gather_30_cast_uint16")];
+            string gather_30_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_30_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_30_cast_uint16_to_int32 = cast(dtype = gather_30_cast_uint16_to_int32_dtype_0, x = gather_30_cast_uint16)[name = string("cast_138")];
+            tensor<int32, [1]> expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = gather_30_cast_uint16_to_int32)[name = string("expand_dims_131")];
+            tensor<int32, [4]> concat_95 = const()[name = string("concat_95"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [1]> concat_96_values0_0 = const()[name = string("concat_96_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_96_values1_0 = const()[name = string("concat_96_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_96_values3_0 = const()[name = string("concat_96_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)];
+            bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (concat_96_values0_0, concat_96_values1_0, expand_dims_131, concat_96_values3_0))[name = string("concat_96")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_95, begin_mask = k_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_96, end_mask = k_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_16_stride_0, update = linear_30_cast_fp16, x = coreml_update_state_96)[name = string("k_cache2_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_16_cast_fp16, input = k_cache2)[name = string("coreml_update_state_98_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_98 = read_state(input = k_cache2)[name = string("coreml_update_state_98")];
+            tensor<int32, [3]> var_653_shape_cast_fp16 = shape(x = linear_31_cast_fp16)[name = string("op_653_shape_cast_fp16")];
+            int32 gather_31_axis_0 = const()[name = string("gather_31_axis_0"), val = int32(0)];
+            int32 gather_31_batch_dims_0 = const()[name = string("gather_31_batch_dims_0"), val = int32(0)];
+            bool gather_31_validate_indices_0 = const()[name = string("gather_31_validate_indices_0"), val = bool(false)];
+            string var_653_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_653_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_31_to_uint16 = const()[name = string("select_31_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_653_shape_cast_fp16_to_uint16 = cast(dtype = var_653_shape_cast_fp16_to_uint16_dtype_0, x = var_653_shape_cast_fp16)[name = string("cast_137")];
+            uint16 gather_31_cast_uint16 = gather(axis = gather_31_axis_0, batch_dims = gather_31_batch_dims_0, indices = select_31_to_uint16, validate_indices = gather_31_validate_indices_0, x = var_653_shape_cast_fp16_to_uint16)[name = string("gather_31_cast_uint16")];
+            string gather_31_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_31_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_135_axes_0 = const()[name = string("expand_dims_135_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_31_cast_uint16_to_int32 = cast(dtype = gather_31_cast_uint16_to_int32_dtype_0, x = gather_31_cast_uint16)[name = string("cast_136")];
+            tensor<int32, [1]> expand_dims_135 = expand_dims(axes = expand_dims_135_axes_0, x = gather_31_cast_uint16_to_int32)[name = string("expand_dims_135")];
+            tensor<int32, [4]> concat_98 = const()[name = string("concat_98"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [1]> concat_99_values0_0 = const()[name = string("concat_99_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)];
+            bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (concat_99_values0_0, concat_99_values1_0, expand_dims_135, concat_99_values3_0))[name = string("concat_99")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_98, begin_mask = v_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_99, end_mask = v_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_16_stride_0, update = linear_31_cast_fp16, x = coreml_update_state_97)[name = string("v_cache2_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_16_cast_fp16, input = v_cache2)[name = string("coreml_update_state_99_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_99 = read_state(input = v_cache2)[name = string("coreml_update_state_99")];
+            tensor<fp16, [1280, 1280]> var_675_to_fp16 = const()[name = string("op_675_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141604544)))];
+            tensor<fp16, [1, ?, 1280]> linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_675_to_fp16, x = audio_data)[name = string("linear_32_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_679_to_fp16 = const()[name = string("op_679_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144881408)))];
+            tensor<fp16, [1280]> var_680_to_fp16 = const()[name = string("op_680_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148158272)))];
+            tensor<fp16, [1, ?, 1280]> linear_33_cast_fp16 = linear(bias = var_680_to_fp16, weight = var_679_to_fp16, x = audio_data)[name = string("linear_33_cast_fp16")];
+            tensor<int32, [3]> var_682_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_682_shape_cast_fp16")];
+            int32 gather_32_axis_0 = const()[name = string("gather_32_axis_0"), val = int32(0)];
+            int32 gather_32_batch_dims_0 = const()[name = string("gather_32_batch_dims_0"), val = int32(0)];
+            bool gather_32_validate_indices_0 = const()[name = string("gather_32_validate_indices_0"), val = bool(false)];
+            string var_682_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_682_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_32_to_uint16 = const()[name = string("select_32_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_682_shape_cast_fp16_to_uint16 = cast(dtype = var_682_shape_cast_fp16_to_uint16_dtype_0, x = var_682_shape_cast_fp16)[name = string("cast_135")];
+            uint16 gather_32_cast_uint16 = gather(axis = gather_32_axis_0, batch_dims = gather_32_batch_dims_0, indices = select_32_to_uint16, validate_indices = gather_32_validate_indices_0, x = var_682_shape_cast_fp16_to_uint16)[name = string("gather_32_cast_uint16")];
+            string gather_32_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_32_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_139_axes_0 = const()[name = string("expand_dims_139_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_32_cast_uint16_to_int32 = cast(dtype = gather_32_cast_uint16_to_int32_dtype_0, x = gather_32_cast_uint16)[name = string("cast_134")];
+            tensor<int32, [1]> expand_dims_139 = expand_dims(axes = expand_dims_139_axes_0, x = gather_32_cast_uint16_to_int32)[name = string("expand_dims_139")];
+            tensor<int32, [4]> concat_101 = const()[name = string("concat_101"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [1]> concat_102_values0_0 = const()[name = string("concat_102_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_102_values1_0 = const()[name = string("concat_102_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_102_values3_0 = const()[name = string("concat_102_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)];
+            bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (concat_102_values0_0, concat_102_values1_0, expand_dims_139, concat_102_values3_0))[name = string("concat_102")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_101, begin_mask = k_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_102, end_mask = k_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_17_stride_0, update = linear_32_cast_fp16, x = coreml_update_state_98)[name = string("k_cache2_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_17_cast_fp16, input = k_cache2)[name = string("coreml_update_state_100_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_100 = read_state(input = k_cache2)[name = string("coreml_update_state_100")];
+            tensor<int32, [3]> var_687_shape_cast_fp16 = shape(x = linear_33_cast_fp16)[name = string("op_687_shape_cast_fp16")];
+            int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)];
+            int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)];
+            bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)];
+            string var_687_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_687_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_687_shape_cast_fp16_to_uint16 = cast(dtype = var_687_shape_cast_fp16_to_uint16_dtype_0, x = var_687_shape_cast_fp16)[name = string("cast_133")];
+            uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_687_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")];
+            string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_143_axes_0 = const()[name = string("expand_dims_143_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_132")];
+            tensor<int32, [1]> expand_dims_143 = expand_dims(axes = expand_dims_143_axes_0, x = gather_33_cast_uint16_to_int32)[name = string("expand_dims_143")];
+            tensor<int32, [4]> concat_104 = const()[name = string("concat_104"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [1]> concat_105_values0_0 = const()[name = string("concat_105_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_105_values1_0 = const()[name = string("concat_105_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_105_values3_0 = const()[name = string("concat_105_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_105_axis_0 = const()[name = string("concat_105_axis_0"), val = int32(0)];
+            bool concat_105_interleave_0 = const()[name = string("concat_105_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_105 = concat(axis = concat_105_axis_0, interleave = concat_105_interleave_0, values = (concat_105_values0_0, concat_105_values1_0, expand_dims_143, concat_105_values3_0))[name = string("concat_105")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_104, begin_mask = v_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_105, end_mask = v_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_17_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_99)[name = string("v_cache2_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_17_cast_fp16, input = v_cache2)[name = string("coreml_update_state_101_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_101 = read_state(input = v_cache2)[name = string("coreml_update_state_101")];
+            tensor<fp16, [1280, 1280]> var_709_to_fp16 = const()[name = string("op_709_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148160896)))];
+            tensor<fp16, [1, ?, 1280]> linear_34_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_709_to_fp16, x = audio_data)[name = string("linear_34_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_713_to_fp16 = const()[name = string("op_713_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151437760)))];
+            tensor<fp16, [1280]> var_714_to_fp16 = const()[name = string("op_714_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154714624)))];
+            tensor<fp16, [1, ?, 1280]> linear_35_cast_fp16 = linear(bias = var_714_to_fp16, weight = var_713_to_fp16, x = audio_data)[name = string("linear_35_cast_fp16")];
+            tensor<int32, [3]> var_716_shape_cast_fp16 = shape(x = linear_34_cast_fp16)[name = string("op_716_shape_cast_fp16")];
+            int32 gather_34_axis_0 = const()[name = string("gather_34_axis_0"), val = int32(0)];
+            int32 gather_34_batch_dims_0 = const()[name = string("gather_34_batch_dims_0"), val = int32(0)];
+            bool gather_34_validate_indices_0 = const()[name = string("gather_34_validate_indices_0"), val = bool(false)];
+            string var_716_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_716_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_34_to_uint16 = const()[name = string("select_34_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_716_shape_cast_fp16_to_uint16 = cast(dtype = var_716_shape_cast_fp16_to_uint16_dtype_0, x = var_716_shape_cast_fp16)[name = string("cast_131")];
+            uint16 gather_34_cast_uint16 = gather(axis = gather_34_axis_0, batch_dims = gather_34_batch_dims_0, indices = select_34_to_uint16, validate_indices = gather_34_validate_indices_0, x = var_716_shape_cast_fp16_to_uint16)[name = string("gather_34_cast_uint16")];
+            string gather_34_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_34_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_34_cast_uint16_to_int32 = cast(dtype = gather_34_cast_uint16_to_int32_dtype_0, x = gather_34_cast_uint16)[name = string("cast_130")];
+            tensor<int32, [1]> expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = gather_34_cast_uint16_to_int32)[name = string("expand_dims_147")];
+            tensor<int32, [4]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [1]> concat_108_values0_0 = const()[name = string("concat_108_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)];
+            bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (concat_108_values0_0, concat_108_values1_0, expand_dims_147, concat_108_values3_0))[name = string("concat_108")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_107, begin_mask = k_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_108, end_mask = k_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_18_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_100)[name = string("k_cache2_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_18_cast_fp16, input = k_cache2)[name = string("coreml_update_state_102_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_102 = read_state(input = k_cache2)[name = string("coreml_update_state_102")];
+            tensor<int32, [3]> var_721_shape_cast_fp16 = shape(x = linear_35_cast_fp16)[name = string("op_721_shape_cast_fp16")];
+            int32 gather_35_axis_0 = const()[name = string("gather_35_axis_0"), val = int32(0)];
+            int32 gather_35_batch_dims_0 = const()[name = string("gather_35_batch_dims_0"), val = int32(0)];
+            bool gather_35_validate_indices_0 = const()[name = string("gather_35_validate_indices_0"), val = bool(false)];
+            string var_721_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_721_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_35_to_uint16 = const()[name = string("select_35_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_721_shape_cast_fp16_to_uint16 = cast(dtype = var_721_shape_cast_fp16_to_uint16_dtype_0, x = var_721_shape_cast_fp16)[name = string("cast_129")];
+            uint16 gather_35_cast_uint16 = gather(axis = gather_35_axis_0, batch_dims = gather_35_batch_dims_0, indices = select_35_to_uint16, validate_indices = gather_35_validate_indices_0, x = var_721_shape_cast_fp16_to_uint16)[name = string("gather_35_cast_uint16")];
+            string gather_35_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_35_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_151_axes_0 = const()[name = string("expand_dims_151_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_35_cast_uint16_to_int32 = cast(dtype = gather_35_cast_uint16_to_int32_dtype_0, x = gather_35_cast_uint16)[name = string("cast_128")];
+            tensor<int32, [1]> expand_dims_151 = expand_dims(axes = expand_dims_151_axes_0, x = gather_35_cast_uint16_to_int32)[name = string("expand_dims_151")];
+            tensor<int32, [4]> concat_110 = const()[name = string("concat_110"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [1]> concat_111_values0_0 = const()[name = string("concat_111_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)];
+            bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (concat_111_values0_0, concat_111_values1_0, expand_dims_151, concat_111_values3_0))[name = string("concat_111")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_110, begin_mask = v_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_111, end_mask = v_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_18_stride_0, update = linear_35_cast_fp16, x = coreml_update_state_101)[name = string("v_cache2_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_18_cast_fp16, input = v_cache2)[name = string("coreml_update_state_103_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_103 = read_state(input = v_cache2)[name = string("coreml_update_state_103")];
+            tensor<fp16, [1280, 1280]> var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154717248)))];
+            tensor<fp16, [1, ?, 1280]> linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_743_to_fp16, x = audio_data)[name = string("linear_36_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_747_to_fp16 = const()[name = string("op_747_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157994112)))];
+            tensor<fp16, [1280]> var_748_to_fp16 = const()[name = string("op_748_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161270976)))];
+            tensor<fp16, [1, ?, 1280]> linear_37_cast_fp16 = linear(bias = var_748_to_fp16, weight = var_747_to_fp16, x = audio_data)[name = string("linear_37_cast_fp16")];
+            tensor<int32, [3]> var_750_shape_cast_fp16 = shape(x = linear_36_cast_fp16)[name = string("op_750_shape_cast_fp16")];
+            int32 gather_36_axis_0 = const()[name = string("gather_36_axis_0"), val = int32(0)];
+            int32 gather_36_batch_dims_0 = const()[name = string("gather_36_batch_dims_0"), val = int32(0)];
+            bool gather_36_validate_indices_0 = const()[name = string("gather_36_validate_indices_0"), val = bool(false)];
+            string var_750_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_750_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_36_to_uint16 = const()[name = string("select_36_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_750_shape_cast_fp16_to_uint16 = cast(dtype = var_750_shape_cast_fp16_to_uint16_dtype_0, x = var_750_shape_cast_fp16)[name = string("cast_127")];
+            uint16 gather_36_cast_uint16 = gather(axis = gather_36_axis_0, batch_dims = gather_36_batch_dims_0, indices = select_36_to_uint16, validate_indices = gather_36_validate_indices_0, x = var_750_shape_cast_fp16_to_uint16)[name = string("gather_36_cast_uint16")];
+            string gather_36_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_36_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_155_axes_0 = const()[name = string("expand_dims_155_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_36_cast_uint16_to_int32 = cast(dtype = gather_36_cast_uint16_to_int32_dtype_0, x = gather_36_cast_uint16)[name = string("cast_126")];
+            tensor<int32, [1]> expand_dims_155 = expand_dims(axes = expand_dims_155_axes_0, x = gather_36_cast_uint16_to_int32)[name = string("expand_dims_155")];
+            tensor<int32, [4]> concat_113 = const()[name = string("concat_113"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [1]> concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_114_values1_0 = const()[name = string("concat_114_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_114_values3_0 = const()[name = string("concat_114_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
+            bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, concat_114_values1_0, expand_dims_155, concat_114_values3_0))[name = string("concat_114")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_113, begin_mask = k_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_114, end_mask = k_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_19_stride_0, update = linear_36_cast_fp16, x = coreml_update_state_102)[name = string("k_cache2_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_19_cast_fp16, input = k_cache2)[name = string("coreml_update_state_104_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_104 = read_state(input = k_cache2)[name = string("coreml_update_state_104")];
+            tensor<int32, [3]> var_755_shape_cast_fp16 = shape(x = linear_37_cast_fp16)[name = string("op_755_shape_cast_fp16")];
+            int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)];
+            int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)];
+            bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)];
+            string var_755_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_755_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_755_shape_cast_fp16_to_uint16 = cast(dtype = var_755_shape_cast_fp16_to_uint16_dtype_0, x = var_755_shape_cast_fp16)[name = string("cast_125")];
+            uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_755_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")];
+            string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_159_axes_0 = const()[name = string("expand_dims_159_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_124")];
+            tensor<int32, [1]> expand_dims_159 = expand_dims(axes = expand_dims_159_axes_0, x = gather_37_cast_uint16_to_int32)[name = string("expand_dims_159")];
+            tensor<int32, [4]> concat_116 = const()[name = string("concat_116"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [1]> concat_117_values0_0 = const()[name = string("concat_117_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_117_values1_0 = const()[name = string("concat_117_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_117_values3_0 = const()[name = string("concat_117_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_117_axis_0 = const()[name = string("concat_117_axis_0"), val = int32(0)];
+            bool concat_117_interleave_0 = const()[name = string("concat_117_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_117 = concat(axis = concat_117_axis_0, interleave = concat_117_interleave_0, values = (concat_117_values0_0, concat_117_values1_0, expand_dims_159, concat_117_values3_0))[name = string("concat_117")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_116, begin_mask = v_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_117, end_mask = v_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_19_stride_0, update = linear_37_cast_fp16, x = coreml_update_state_103)[name = string("v_cache2_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_19_cast_fp16, input = v_cache2)[name = string("coreml_update_state_105_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_105 = read_state(input = v_cache2)[name = string("coreml_update_state_105")];
+            tensor<fp16, [1280, 1280]> var_777_to_fp16 = const()[name = string("op_777_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161273600)))];
+            tensor<fp16, [1, ?, 1280]> linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_777_to_fp16, x = audio_data)[name = string("linear_38_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_781_to_fp16 = const()[name = string("op_781_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164550464)))];
+            tensor<fp16, [1280]> var_782_to_fp16 = const()[name = string("op_782_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167827328)))];
+            tensor<fp16, [1, ?, 1280]> linear_39_cast_fp16 = linear(bias = var_782_to_fp16, weight = var_781_to_fp16, x = audio_data)[name = string("linear_39_cast_fp16")];
+            tensor<int32, [3]> var_784_shape_cast_fp16 = shape(x = linear_38_cast_fp16)[name = string("op_784_shape_cast_fp16")];
+            int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
+            int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
+            bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
+            string var_784_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_784_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_784_shape_cast_fp16_to_uint16 = cast(dtype = var_784_shape_cast_fp16_to_uint16_dtype_0, x = var_784_shape_cast_fp16)[name = string("cast_123")];
+            uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_784_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
+            string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_122")];
+            tensor<int32, [1]> expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = gather_38_cast_uint16_to_int32)[name = string("expand_dims_163")];
+            tensor<int32, [4]> concat_119 = const()[name = string("concat_119"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [1]> concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_120_values1_0 = const()[name = string("concat_120_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_120_values3_0 = const()[name = string("concat_120_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)];
+            bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, concat_120_values1_0, expand_dims_163, concat_120_values3_0))[name = string("concat_120")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_119, begin_mask = k_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_120, end_mask = k_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_20_stride_0, update = linear_38_cast_fp16, x = coreml_update_state_104)[name = string("k_cache2_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_20_cast_fp16, input = k_cache2)[name = string("coreml_update_state_106_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_106 = read_state(input = k_cache2)[name = string("coreml_update_state_106")];
+            tensor<int32, [3]> var_789_shape_cast_fp16 = shape(x = linear_39_cast_fp16)[name = string("op_789_shape_cast_fp16")];
+            int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)];
+            int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)];
+            bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)];
+            string var_789_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_789_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_789_shape_cast_fp16_to_uint16 = cast(dtype = var_789_shape_cast_fp16_to_uint16_dtype_0, x = var_789_shape_cast_fp16)[name = string("cast_121")];
+            uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_789_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")];
+            string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_167_axes_0 = const()[name = string("expand_dims_167_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_39_cast_uint16_to_int32 = cast(dtype = gather_39_cast_uint16_to_int32_dtype_0, x = gather_39_cast_uint16)[name = string("cast_120")];
+            tensor<int32, [1]> expand_dims_167 = expand_dims(axes = expand_dims_167_axes_0, x = gather_39_cast_uint16_to_int32)[name = string("expand_dims_167")];
+            tensor<int32, [4]> concat_122 = const()[name = string("concat_122"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [1]> concat_123_values0_0 = const()[name = string("concat_123_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)];
+            bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (concat_123_values0_0, concat_123_values1_0, expand_dims_167, concat_123_values3_0))[name = string("concat_123")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_122, begin_mask = v_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_123, end_mask = v_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_20_stride_0, update = linear_39_cast_fp16, x = coreml_update_state_105)[name = string("v_cache2_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_20_cast_fp16, input = v_cache2)[name = string("coreml_update_state_107_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_107 = read_state(input = v_cache2)[name = string("coreml_update_state_107")];
+            tensor<fp16, [1280, 1280]> var_811_to_fp16 = const()[name = string("op_811_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167829952)))];
+            tensor<fp16, [1, ?, 1280]> linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_811_to_fp16, x = audio_data)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_815_to_fp16 = const()[name = string("op_815_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171106816)))];
+            tensor<fp16, [1280]> var_816_to_fp16 = const()[name = string("op_816_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174383680)))];
+            tensor<fp16, [1, ?, 1280]> linear_41_cast_fp16 = linear(bias = var_816_to_fp16, weight = var_815_to_fp16, x = audio_data)[name = string("linear_41_cast_fp16")];
+            tensor<int32, [3]> var_818_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_818_shape_cast_fp16")];
+            int32 gather_40_axis_0 = const()[name = string("gather_40_axis_0"), val = int32(0)];
+            int32 gather_40_batch_dims_0 = const()[name = string("gather_40_batch_dims_0"), val = int32(0)];
+            bool gather_40_validate_indices_0 = const()[name = string("gather_40_validate_indices_0"), val = bool(false)];
+            string var_818_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_818_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_40_to_uint16 = const()[name = string("select_40_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_818_shape_cast_fp16_to_uint16 = cast(dtype = var_818_shape_cast_fp16_to_uint16_dtype_0, x = var_818_shape_cast_fp16)[name = string("cast_119")];
+            uint16 gather_40_cast_uint16 = gather(axis = gather_40_axis_0, batch_dims = gather_40_batch_dims_0, indices = select_40_to_uint16, validate_indices = gather_40_validate_indices_0, x = var_818_shape_cast_fp16_to_uint16)[name = string("gather_40_cast_uint16")];
+            string gather_40_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_40_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_171_axes_0 = const()[name = string("expand_dims_171_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_40_cast_uint16_to_int32 = cast(dtype = gather_40_cast_uint16_to_int32_dtype_0, x = gather_40_cast_uint16)[name = string("cast_118")];
+            tensor<int32, [1]> expand_dims_171 = expand_dims(axes = expand_dims_171_axes_0, x = gather_40_cast_uint16_to_int32)[name = string("expand_dims_171")];
+            tensor<int32, [4]> concat_125 = const()[name = string("concat_125"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [1]> concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_126_values1_0 = const()[name = string("concat_126_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_126_values3_0 = const()[name = string("concat_126_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)];
+            bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, concat_126_values1_0, expand_dims_171, concat_126_values3_0))[name = string("concat_126")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_125, begin_mask = k_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_126, end_mask = k_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_21_stride_0, update = linear_40_cast_fp16, x = coreml_update_state_106)[name = string("k_cache2_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_21_cast_fp16, input = k_cache2)[name = string("coreml_update_state_108_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_108 = read_state(input = k_cache2)[name = string("coreml_update_state_108")];
+            tensor<int32, [3]> var_823_shape_cast_fp16 = shape(x = linear_41_cast_fp16)[name = string("op_823_shape_cast_fp16")];
+            int32 gather_41_axis_0 = const()[name = string("gather_41_axis_0"), val = int32(0)];
+            int32 gather_41_batch_dims_0 = const()[name = string("gather_41_batch_dims_0"), val = int32(0)];
+            bool gather_41_validate_indices_0 = const()[name = string("gather_41_validate_indices_0"), val = bool(false)];
+            string var_823_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_823_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_41_to_uint16 = const()[name = string("select_41_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_823_shape_cast_fp16_to_uint16 = cast(dtype = var_823_shape_cast_fp16_to_uint16_dtype_0, x = var_823_shape_cast_fp16)[name = string("cast_117")];
+            uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_823_shape_cast_fp16_to_uint16)[name = string("gather_41_cast_uint16")];
+            string gather_41_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_41_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_175_axes_0 = const()[name = string("expand_dims_175_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_41_cast_uint16_to_int32 = cast(dtype = gather_41_cast_uint16_to_int32_dtype_0, x = gather_41_cast_uint16)[name = string("cast_116")];
+            tensor<int32, [1]> expand_dims_175 = expand_dims(axes = expand_dims_175_axes_0, x = gather_41_cast_uint16_to_int32)[name = string("expand_dims_175")];
+            tensor<int32, [4]> concat_128 = const()[name = string("concat_128"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [1]> concat_129_values0_0 = const()[name = string("concat_129_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)];
+            bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (concat_129_values0_0, concat_129_values1_0, expand_dims_175, concat_129_values3_0))[name = string("concat_129")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_128, begin_mask = v_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_129, end_mask = v_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_21_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_107)[name = string("v_cache2_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_21_cast_fp16, input = v_cache2)[name = string("coreml_update_state_109_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_109 = read_state(input = v_cache2)[name = string("coreml_update_state_109")];
+            tensor<fp16, [1280, 1280]> var_845_to_fp16 = const()[name = string("op_845_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174386304)))];
+            tensor<fp16, [1, ?, 1280]> linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_845_to_fp16, x = audio_data)[name = string("linear_42_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177663168)))];
+            tensor<fp16, [1280]> var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180940032)))];
+            tensor<fp16, [1, ?, 1280]> linear_43_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = audio_data)[name = string("linear_43_cast_fp16")];
+            tensor<int32, [3]> var_852_shape_cast_fp16 = shape(x = linear_42_cast_fp16)[name = string("op_852_shape_cast_fp16")];
+            int32 gather_42_axis_0 = const()[name = string("gather_42_axis_0"), val = int32(0)];
+            int32 gather_42_batch_dims_0 = const()[name = string("gather_42_batch_dims_0"), val = int32(0)];
+            bool gather_42_validate_indices_0 = const()[name = string("gather_42_validate_indices_0"), val = bool(false)];
+            string var_852_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_852_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_42_to_uint16 = const()[name = string("select_42_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_852_shape_cast_fp16_to_uint16 = cast(dtype = var_852_shape_cast_fp16_to_uint16_dtype_0, x = var_852_shape_cast_fp16)[name = string("cast_115")];
+            uint16 gather_42_cast_uint16 = gather(axis = gather_42_axis_0, batch_dims = gather_42_batch_dims_0, indices = select_42_to_uint16, validate_indices = gather_42_validate_indices_0, x = var_852_shape_cast_fp16_to_uint16)[name = string("gather_42_cast_uint16")];
+            string gather_42_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_42_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_42_cast_uint16_to_int32 = cast(dtype = gather_42_cast_uint16_to_int32_dtype_0, x = gather_42_cast_uint16)[name = string("cast_114")];
+            tensor<int32, [1]> expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = gather_42_cast_uint16_to_int32)[name = string("expand_dims_179")];
+            tensor<int32, [4]> concat_131 = const()[name = string("concat_131"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [1]> concat_132_values0_0 = const()[name = string("concat_132_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)];
+            bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (concat_132_values0_0, concat_132_values1_0, expand_dims_179, concat_132_values3_0))[name = string("concat_132")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_131, begin_mask = k_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_132, end_mask = k_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_22_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_108)[name = string("k_cache2_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_22_cast_fp16, input = k_cache2)[name = string("coreml_update_state_110_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_110 = read_state(input = k_cache2)[name = string("coreml_update_state_110")];
+            tensor<int32, [3]> var_857_shape_cast_fp16 = shape(x = linear_43_cast_fp16)[name = string("op_857_shape_cast_fp16")];
+            int32 gather_43_axis_0 = const()[name = string("gather_43_axis_0"), val = int32(0)];
+            int32 gather_43_batch_dims_0 = const()[name = string("gather_43_batch_dims_0"), val = int32(0)];
+            bool gather_43_validate_indices_0 = const()[name = string("gather_43_validate_indices_0"), val = bool(false)];
+            string var_857_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_857_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_43_to_uint16 = const()[name = string("select_43_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_857_shape_cast_fp16_to_uint16 = cast(dtype = var_857_shape_cast_fp16_to_uint16_dtype_0, x = var_857_shape_cast_fp16)[name = string("cast_113")];
+            uint16 gather_43_cast_uint16 = gather(axis = gather_43_axis_0, batch_dims = gather_43_batch_dims_0, indices = select_43_to_uint16, validate_indices = gather_43_validate_indices_0, x = var_857_shape_cast_fp16_to_uint16)[name = string("gather_43_cast_uint16")];
+            string gather_43_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_43_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_183_axes_0 = const()[name = string("expand_dims_183_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_43_cast_uint16_to_int32 = cast(dtype = gather_43_cast_uint16_to_int32_dtype_0, x = gather_43_cast_uint16)[name = string("cast_112")];
+            tensor<int32, [1]> expand_dims_183 = expand_dims(axes = expand_dims_183_axes_0, x = gather_43_cast_uint16_to_int32)[name = string("expand_dims_183")];
+            tensor<int32, [4]> concat_134 = const()[name = string("concat_134"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [1]> concat_135_values0_0 = const()[name = string("concat_135_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)];
+            bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (concat_135_values0_0, concat_135_values1_0, expand_dims_183, concat_135_values3_0))[name = string("concat_135")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_134, begin_mask = v_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_135, end_mask = v_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_22_stride_0, update = linear_43_cast_fp16, x = coreml_update_state_109)[name = string("v_cache2_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_22_cast_fp16, input = v_cache2)[name = string("coreml_update_state_111_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_111 = read_state(input = v_cache2)[name = string("coreml_update_state_111")];
+            tensor<fp16, [1280, 1280]> var_879_to_fp16 = const()[name = string("op_879_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180942656)))];
+            tensor<fp16, [1, ?, 1280]> linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_879_to_fp16, x = audio_data)[name = string("linear_44_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_883_to_fp16 = const()[name = string("op_883_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184219520)))];
+            tensor<fp16, [1280]> var_884_to_fp16 = const()[name = string("op_884_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187496384)))];
+            tensor<fp16, [1, ?, 1280]> linear_45_cast_fp16 = linear(bias = var_884_to_fp16, weight = var_883_to_fp16, x = audio_data)[name = string("linear_45_cast_fp16")];
+            tensor<int32, [3]> var_886_shape_cast_fp16 = shape(x = linear_44_cast_fp16)[name = string("op_886_shape_cast_fp16")];
+            int32 gather_44_axis_0 = const()[name = string("gather_44_axis_0"), val = int32(0)];
+            int32 gather_44_batch_dims_0 = const()[name = string("gather_44_batch_dims_0"), val = int32(0)];
+            bool gather_44_validate_indices_0 = const()[name = string("gather_44_validate_indices_0"), val = bool(false)];
+            string var_886_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_886_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_44_to_uint16 = const()[name = string("select_44_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_886_shape_cast_fp16_to_uint16 = cast(dtype = var_886_shape_cast_fp16_to_uint16_dtype_0, x = var_886_shape_cast_fp16)[name = string("cast_111")];
+            uint16 gather_44_cast_uint16 = gather(axis = gather_44_axis_0, batch_dims = gather_44_batch_dims_0, indices = select_44_to_uint16, validate_indices = gather_44_validate_indices_0, x = var_886_shape_cast_fp16_to_uint16)[name = string("gather_44_cast_uint16")];
+            string gather_44_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_44_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_187_axes_0 = const()[name = string("expand_dims_187_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_44_cast_uint16_to_int32 = cast(dtype = gather_44_cast_uint16_to_int32_dtype_0, x = gather_44_cast_uint16)[name = string("cast_110")];
+            tensor<int32, [1]> expand_dims_187 = expand_dims(axes = expand_dims_187_axes_0, x = gather_44_cast_uint16_to_int32)[name = string("expand_dims_187")];
+            tensor<int32, [4]> concat_137 = const()[name = string("concat_137"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [1]> concat_138_values0_0 = const()[name = string("concat_138_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_138_values1_0 = const()[name = string("concat_138_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_138_values3_0 = const()[name = string("concat_138_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)];
+            bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (concat_138_values0_0, concat_138_values1_0, expand_dims_187, concat_138_values3_0))[name = string("concat_138")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_137, begin_mask = k_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_138, end_mask = k_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_23_stride_0, update = linear_44_cast_fp16, x = coreml_update_state_110)[name = string("k_cache2_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_23_cast_fp16, input = k_cache2)[name = string("coreml_update_state_112_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_112 = read_state(input = k_cache2)[name = string("coreml_update_state_112")];
+            tensor<int32, [3]> var_891_shape_cast_fp16 = shape(x = linear_45_cast_fp16)[name = string("op_891_shape_cast_fp16")];
+            int32 gather_45_axis_0 = const()[name = string("gather_45_axis_0"), val = int32(0)];
+            int32 gather_45_batch_dims_0 = const()[name = string("gather_45_batch_dims_0"), val = int32(0)];
+            bool gather_45_validate_indices_0 = const()[name = string("gather_45_validate_indices_0"), val = bool(false)];
+            string var_891_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_891_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_45_to_uint16 = const()[name = string("select_45_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_891_shape_cast_fp16_to_uint16 = cast(dtype = var_891_shape_cast_fp16_to_uint16_dtype_0, x = var_891_shape_cast_fp16)[name = string("cast_109")];
+            uint16 gather_45_cast_uint16 = gather(axis = gather_45_axis_0, batch_dims = gather_45_batch_dims_0, indices = select_45_to_uint16, validate_indices = gather_45_validate_indices_0, x = var_891_shape_cast_fp16_to_uint16)[name = string("gather_45_cast_uint16")];
+            string gather_45_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_45_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_191_axes_0 = const()[name = string("expand_dims_191_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_45_cast_uint16_to_int32 = cast(dtype = gather_45_cast_uint16_to_int32_dtype_0, x = gather_45_cast_uint16)[name = string("cast_108")];
+            tensor<int32, [1]> expand_dims_191 = expand_dims(axes = expand_dims_191_axes_0, x = gather_45_cast_uint16_to_int32)[name = string("expand_dims_191")];
+            tensor<int32, [4]> concat_140 = const()[name = string("concat_140"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [1]> concat_141_values0_0 = const()[name = string("concat_141_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_141_values1_0 = const()[name = string("concat_141_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_141_values3_0 = const()[name = string("concat_141_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_141_axis_0 = const()[name = string("concat_141_axis_0"), val = int32(0)];
+            bool concat_141_interleave_0 = const()[name = string("concat_141_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_141 = concat(axis = concat_141_axis_0, interleave = concat_141_interleave_0, values = (concat_141_values0_0, concat_141_values1_0, expand_dims_191, concat_141_values3_0))[name = string("concat_141")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_140, begin_mask = v_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_141, end_mask = v_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_23_stride_0, update = linear_45_cast_fp16, x = coreml_update_state_111)[name = string("v_cache2_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_23_cast_fp16, input = v_cache2)[name = string("coreml_update_state_113_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_113 = read_state(input = v_cache2)[name = string("coreml_update_state_113")];
+            tensor<fp16, [1280, 1280]> var_913_to_fp16 = const()[name = string("op_913_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187499008)))];
+            tensor<fp16, [1, ?, 1280]> linear_46_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_913_to_fp16, x = audio_data)[name = string("linear_46_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_917_to_fp16 = const()[name = string("op_917_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190775872)))];
+            tensor<fp16, [1280]> var_918_to_fp16 = const()[name = string("op_918_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194052736)))];
+            tensor<fp16, [1, ?, 1280]> linear_47_cast_fp16 = linear(bias = var_918_to_fp16, weight = var_917_to_fp16, x = audio_data)[name = string("linear_47_cast_fp16")];
+            tensor<int32, [3]> var_920_shape_cast_fp16 = shape(x = linear_46_cast_fp16)[name = string("op_920_shape_cast_fp16")];
+            int32 gather_46_axis_0 = const()[name = string("gather_46_axis_0"), val = int32(0)];
+            int32 gather_46_batch_dims_0 = const()[name = string("gather_46_batch_dims_0"), val = int32(0)];
+            bool gather_46_validate_indices_0 = const()[name = string("gather_46_validate_indices_0"), val = bool(false)];
+            string var_920_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_920_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_46_to_uint16 = const()[name = string("select_46_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_920_shape_cast_fp16_to_uint16 = cast(dtype = var_920_shape_cast_fp16_to_uint16_dtype_0, x = var_920_shape_cast_fp16)[name = string("cast_107")];
+            uint16 gather_46_cast_uint16 = gather(axis = gather_46_axis_0, batch_dims = gather_46_batch_dims_0, indices = select_46_to_uint16, validate_indices = gather_46_validate_indices_0, x = var_920_shape_cast_fp16_to_uint16)[name = string("gather_46_cast_uint16")];
+            string gather_46_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_46_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_46_cast_uint16_to_int32 = cast(dtype = gather_46_cast_uint16_to_int32_dtype_0, x = gather_46_cast_uint16)[name = string("cast_106")];
+            tensor<int32, [1]> expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = gather_46_cast_uint16_to_int32)[name = string("expand_dims_195")];
+            tensor<int32, [4]> concat_143 = const()[name = string("concat_143"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [1]> concat_144_values0_0 = const()[name = string("concat_144_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_144_values1_0 = const()[name = string("concat_144_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_144_values3_0 = const()[name = string("concat_144_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_144_axis_0 = const()[name = string("concat_144_axis_0"), val = int32(0)];
+            bool concat_144_interleave_0 = const()[name = string("concat_144_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_144 = concat(axis = concat_144_axis_0, interleave = concat_144_interleave_0, values = (concat_144_values0_0, concat_144_values1_0, expand_dims_195, concat_144_values3_0))[name = string("concat_144")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_143, begin_mask = k_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_144, end_mask = k_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_24_stride_0, update = linear_46_cast_fp16, x = coreml_update_state_112)[name = string("k_cache2_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_24_cast_fp16, input = k_cache2)[name = string("coreml_update_state_114_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_114 = read_state(input = k_cache2)[name = string("coreml_update_state_114")];
+            tensor<int32, [3]> var_925_shape_cast_fp16 = shape(x = linear_47_cast_fp16)[name = string("op_925_shape_cast_fp16")];
+            int32 gather_47_axis_0 = const()[name = string("gather_47_axis_0"), val = int32(0)];
+            int32 gather_47_batch_dims_0 = const()[name = string("gather_47_batch_dims_0"), val = int32(0)];
+            bool gather_47_validate_indices_0 = const()[name = string("gather_47_validate_indices_0"), val = bool(false)];
+            string var_925_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_925_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_47_to_uint16 = const()[name = string("select_47_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_925_shape_cast_fp16_to_uint16 = cast(dtype = var_925_shape_cast_fp16_to_uint16_dtype_0, x = var_925_shape_cast_fp16)[name = string("cast_105")];
+            uint16 gather_47_cast_uint16 = gather(axis = gather_47_axis_0, batch_dims = gather_47_batch_dims_0, indices = select_47_to_uint16, validate_indices = gather_47_validate_indices_0, x = var_925_shape_cast_fp16_to_uint16)[name = string("gather_47_cast_uint16")];
+            string gather_47_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_47_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_199_axes_0 = const()[name = string("expand_dims_199_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_47_cast_uint16_to_int32 = cast(dtype = gather_47_cast_uint16_to_int32_dtype_0, x = gather_47_cast_uint16)[name = string("cast_104")];
+            tensor<int32, [1]> expand_dims_199 = expand_dims(axes = expand_dims_199_axes_0, x = gather_47_cast_uint16_to_int32)[name = string("expand_dims_199")];
+            tensor<int32, [4]> concat_146 = const()[name = string("concat_146"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [1]> concat_147_values0_0 = const()[name = string("concat_147_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)];
+            bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (concat_147_values0_0, concat_147_values1_0, expand_dims_199, concat_147_values3_0))[name = string("concat_147")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_146, begin_mask = v_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_147, end_mask = v_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_24_stride_0, update = linear_47_cast_fp16, x = coreml_update_state_113)[name = string("v_cache2_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_24_cast_fp16, input = v_cache2)[name = string("coreml_update_state_115_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_115 = read_state(input = v_cache2)[name = string("coreml_update_state_115")];
+            tensor<fp16, [1280, 1280]> var_947_to_fp16 = const()[name = string("op_947_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194055360)))];
+            tensor<fp16, [1, ?, 1280]> linear_48_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_947_to_fp16, x = audio_data)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_951_to_fp16 = const()[name = string("op_951_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197332224)))];
+            tensor<fp16, [1280]> var_952_to_fp16 = const()[name = string("op_952_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200609088)))];
+            tensor<fp16, [1, ?, 1280]> linear_49_cast_fp16 = linear(bias = var_952_to_fp16, weight = var_951_to_fp16, x = audio_data)[name = string("linear_49_cast_fp16")];
+            tensor<int32, [3]> var_954_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_954_shape_cast_fp16")];
+            int32 gather_48_axis_0 = const()[name = string("gather_48_axis_0"), val = int32(0)];
+            int32 gather_48_batch_dims_0 = const()[name = string("gather_48_batch_dims_0"), val = int32(0)];
+            bool gather_48_validate_indices_0 = const()[name = string("gather_48_validate_indices_0"), val = bool(false)];
+            string var_954_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_954_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_48_to_uint16 = const()[name = string("select_48_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_954_shape_cast_fp16_to_uint16 = cast(dtype = var_954_shape_cast_fp16_to_uint16_dtype_0, x = var_954_shape_cast_fp16)[name = string("cast_103")];
+            uint16 gather_48_cast_uint16 = gather(axis = gather_48_axis_0, batch_dims = gather_48_batch_dims_0, indices = select_48_to_uint16, validate_indices = gather_48_validate_indices_0, x = var_954_shape_cast_fp16_to_uint16)[name = string("gather_48_cast_uint16")];
+            string gather_48_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_48_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_203_axes_0 = const()[name = string("expand_dims_203_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_48_cast_uint16_to_int32 = cast(dtype = gather_48_cast_uint16_to_int32_dtype_0, x = gather_48_cast_uint16)[name = string("cast_102")];
+            tensor<int32, [1]> expand_dims_203 = expand_dims(axes = expand_dims_203_axes_0, x = gather_48_cast_uint16_to_int32)[name = string("expand_dims_203")];
+            tensor<int32, [4]> concat_149 = const()[name = string("concat_149"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [1]> concat_150_values0_0 = const()[name = string("concat_150_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_150_values1_0 = const()[name = string("concat_150_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_150_values3_0 = const()[name = string("concat_150_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)];
+            bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (concat_150_values0_0, concat_150_values1_0, expand_dims_203, concat_150_values3_0))[name = string("concat_150")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_25_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_25_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_25_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_25_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_149, begin_mask = k_cache2_internal_tensor_assign_25_begin_mask_0, end = concat_150, end_mask = k_cache2_internal_tensor_assign_25_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_25_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_25_stride_0, update = linear_48_cast_fp16, x = coreml_update_state_114)[name = string("k_cache2_internal_tensor_assign_25_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_25_cast_fp16, input = k_cache2)[name = string("coreml_update_state_116_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_116 = read_state(input = k_cache2)[name = string("coreml_update_state_116")];
+            tensor<int32, [3]> var_959_shape_cast_fp16 = shape(x = linear_49_cast_fp16)[name = string("op_959_shape_cast_fp16")];
+            int32 gather_49_axis_0 = const()[name = string("gather_49_axis_0"), val = int32(0)];
+            int32 gather_49_batch_dims_0 = const()[name = string("gather_49_batch_dims_0"), val = int32(0)];
+            bool gather_49_validate_indices_0 = const()[name = string("gather_49_validate_indices_0"), val = bool(false)];
+            string var_959_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_959_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_49_to_uint16 = const()[name = string("select_49_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_959_shape_cast_fp16_to_uint16 = cast(dtype = var_959_shape_cast_fp16_to_uint16_dtype_0, x = var_959_shape_cast_fp16)[name = string("cast_101")];
+            uint16 gather_49_cast_uint16 = gather(axis = gather_49_axis_0, batch_dims = gather_49_batch_dims_0, indices = select_49_to_uint16, validate_indices = gather_49_validate_indices_0, x = var_959_shape_cast_fp16_to_uint16)[name = string("gather_49_cast_uint16")];
+            string gather_49_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_49_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_207_axes_0 = const()[name = string("expand_dims_207_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_49_cast_uint16_to_int32 = cast(dtype = gather_49_cast_uint16_to_int32_dtype_0, x = gather_49_cast_uint16)[name = string("cast_100")];
+            tensor<int32, [1]> expand_dims_207 = expand_dims(axes = expand_dims_207_axes_0, x = gather_49_cast_uint16_to_int32)[name = string("expand_dims_207")];
+            tensor<int32, [4]> concat_152 = const()[name = string("concat_152"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [1]> concat_153_values0_0 = const()[name = string("concat_153_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_153_values1_0 = const()[name = string("concat_153_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_153_values3_0 = const()[name = string("concat_153_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_153_axis_0 = const()[name = string("concat_153_axis_0"), val = int32(0)];
+            bool concat_153_interleave_0 = const()[name = string("concat_153_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_153 = concat(axis = concat_153_axis_0, interleave = concat_153_interleave_0, values = (concat_153_values0_0, concat_153_values1_0, expand_dims_207, concat_153_values3_0))[name = string("concat_153")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_25_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_25_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_25_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_25_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_cache2_internal_tensor_assign_25_begin_mask_0, end = concat_153, end_mask = v_cache2_internal_tensor_assign_25_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_25_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_25_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_115)[name = string("v_cache2_internal_tensor_assign_25_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_25_cast_fp16, input = v_cache2)[name = string("coreml_update_state_117_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_117 = read_state(input = v_cache2)[name = string("coreml_update_state_117")];
+            tensor<fp16, [1280, 1280]> var_981_to_fp16 = const()[name = string("op_981_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200611712)))];
+            tensor<fp16, [1, ?, 1280]> linear_50_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_981_to_fp16, x = audio_data)[name = string("linear_50_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_985_to_fp16 = const()[name = string("op_985_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203888576)))];
+            tensor<fp16, [1280]> var_986_to_fp16 = const()[name = string("op_986_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207165440)))];
+            tensor<fp16, [1, ?, 1280]> linear_51_cast_fp16 = linear(bias = var_986_to_fp16, weight = var_985_to_fp16, x = audio_data)[name = string("linear_51_cast_fp16")];
+            tensor<int32, [3]> var_988_shape_cast_fp16 = shape(x = linear_50_cast_fp16)[name = string("op_988_shape_cast_fp16")];
+            int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)];
+            int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)];
+            bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)];
+            string var_988_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_988_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_988_shape_cast_fp16_to_uint16 = cast(dtype = var_988_shape_cast_fp16_to_uint16_dtype_0, x = var_988_shape_cast_fp16)[name = string("cast_99")];
+            uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_988_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")];
+            string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_98")];
+            tensor<int32, [1]> expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = gather_50_cast_uint16_to_int32)[name = string("expand_dims_211")];
+            tensor<int32, [4]> concat_155 = const()[name = string("concat_155"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [1]> concat_156_values0_0 = const()[name = string("concat_156_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_156_values1_0 = const()[name = string("concat_156_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_156_values3_0 = const()[name = string("concat_156_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_156_axis_0 = const()[name = string("concat_156_axis_0"), val = int32(0)];
+            bool concat_156_interleave_0 = const()[name = string("concat_156_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_156 = concat(axis = concat_156_axis_0, interleave = concat_156_interleave_0, values = (concat_156_values0_0, concat_156_values1_0, expand_dims_211, concat_156_values3_0))[name = string("concat_156")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_26_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_26_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_26_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_26_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_155, begin_mask = k_cache2_internal_tensor_assign_26_begin_mask_0, end = concat_156, end_mask = k_cache2_internal_tensor_assign_26_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_26_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_26_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_116)[name = string("k_cache2_internal_tensor_assign_26_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_26_cast_fp16, input = k_cache2)[name = string("coreml_update_state_118_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_118 = read_state(input = k_cache2)[name = string("coreml_update_state_118")];
+            tensor<int32, [3]> var_993_shape_cast_fp16 = shape(x = linear_51_cast_fp16)[name = string("op_993_shape_cast_fp16")];
+            int32 gather_51_axis_0 = const()[name = string("gather_51_axis_0"), val = int32(0)];
+            int32 gather_51_batch_dims_0 = const()[name = string("gather_51_batch_dims_0"), val = int32(0)];
+            bool gather_51_validate_indices_0 = const()[name = string("gather_51_validate_indices_0"), val = bool(false)];
+            string var_993_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_993_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_51_to_uint16 = const()[name = string("select_51_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_993_shape_cast_fp16_to_uint16 = cast(dtype = var_993_shape_cast_fp16_to_uint16_dtype_0, x = var_993_shape_cast_fp16)[name = string("cast_97")];
+            uint16 gather_51_cast_uint16 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51_to_uint16, validate_indices = gather_51_validate_indices_0, x = var_993_shape_cast_fp16_to_uint16)[name = string("gather_51_cast_uint16")];
+            string gather_51_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_51_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_215_axes_0 = const()[name = string("expand_dims_215_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_51_cast_uint16_to_int32 = cast(dtype = gather_51_cast_uint16_to_int32_dtype_0, x = gather_51_cast_uint16)[name = string("cast_96")];
+            tensor<int32, [1]> expand_dims_215 = expand_dims(axes = expand_dims_215_axes_0, x = gather_51_cast_uint16_to_int32)[name = string("expand_dims_215")];
+            tensor<int32, [4]> concat_158 = const()[name = string("concat_158"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [1]> concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)];
+            bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_215, concat_159_values3_0))[name = string("concat_159")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_26_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_26_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_26_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_26_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache2_internal_tensor_assign_26_begin_mask_0, end = concat_159, end_mask = v_cache2_internal_tensor_assign_26_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_26_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_26_stride_0, update = linear_51_cast_fp16, x = coreml_update_state_117)[name = string("v_cache2_internal_tensor_assign_26_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_26_cast_fp16, input = v_cache2)[name = string("coreml_update_state_119_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_119 = read_state(input = v_cache2)[name = string("coreml_update_state_119")];
+            tensor<fp16, [1280, 1280]> var_1015_to_fp16 = const()[name = string("op_1015_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207168064)))];
+            tensor<fp16, [1, ?, 1280]> linear_52_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1015_to_fp16, x = audio_data)[name = string("linear_52_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1019_to_fp16 = const()[name = string("op_1019_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210444928)))];
+            tensor<fp16, [1280]> var_1020_to_fp16 = const()[name = string("op_1020_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213721792)))];
+            tensor<fp16, [1, ?, 1280]> linear_53_cast_fp16 = linear(bias = var_1020_to_fp16, weight = var_1019_to_fp16, x = audio_data)[name = string("linear_53_cast_fp16")];
+            tensor<int32, [3]> var_1022_shape_cast_fp16 = shape(x = linear_52_cast_fp16)[name = string("op_1022_shape_cast_fp16")];
+            int32 gather_52_axis_0 = const()[name = string("gather_52_axis_0"), val = int32(0)];
+            int32 gather_52_batch_dims_0 = const()[name = string("gather_52_batch_dims_0"), val = int32(0)];
+            bool gather_52_validate_indices_0 = const()[name = string("gather_52_validate_indices_0"), val = bool(false)];
+            string var_1022_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1022_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_52_to_uint16 = const()[name = string("select_52_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1022_shape_cast_fp16_to_uint16 = cast(dtype = var_1022_shape_cast_fp16_to_uint16_dtype_0, x = var_1022_shape_cast_fp16)[name = string("cast_95")];
+            uint16 gather_52_cast_uint16 = gather(axis = gather_52_axis_0, batch_dims = gather_52_batch_dims_0, indices = select_52_to_uint16, validate_indices = gather_52_validate_indices_0, x = var_1022_shape_cast_fp16_to_uint16)[name = string("gather_52_cast_uint16")];
+            string gather_52_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_52_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_219_axes_0 = const()[name = string("expand_dims_219_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_52_cast_uint16_to_int32 = cast(dtype = gather_52_cast_uint16_to_int32_dtype_0, x = gather_52_cast_uint16)[name = string("cast_94")];
+            tensor<int32, [1]> expand_dims_219 = expand_dims(axes = expand_dims_219_axes_0, x = gather_52_cast_uint16_to_int32)[name = string("expand_dims_219")];
+            tensor<int32, [4]> concat_161 = const()[name = string("concat_161"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [1]> concat_162_values0_0 = const()[name = string("concat_162_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_162_values1_0 = const()[name = string("concat_162_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_162_values3_0 = const()[name = string("concat_162_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_162_axis_0 = const()[name = string("concat_162_axis_0"), val = int32(0)];
+            bool concat_162_interleave_0 = const()[name = string("concat_162_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_162 = concat(axis = concat_162_axis_0, interleave = concat_162_interleave_0, values = (concat_162_values0_0, concat_162_values1_0, expand_dims_219, concat_162_values3_0))[name = string("concat_162")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_27_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_27_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_27_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_27_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_161, begin_mask = k_cache2_internal_tensor_assign_27_begin_mask_0, end = concat_162, end_mask = k_cache2_internal_tensor_assign_27_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_27_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_27_stride_0, update = linear_52_cast_fp16, x = coreml_update_state_118)[name = string("k_cache2_internal_tensor_assign_27_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_27_cast_fp16, input = k_cache2)[name = string("coreml_update_state_120_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_120 = read_state(input = k_cache2)[name = string("coreml_update_state_120")];
+            tensor<int32, [3]> var_1027_shape_cast_fp16 = shape(x = linear_53_cast_fp16)[name = string("op_1027_shape_cast_fp16")];
+            int32 gather_53_axis_0 = const()[name = string("gather_53_axis_0"), val = int32(0)];
+            int32 gather_53_batch_dims_0 = const()[name = string("gather_53_batch_dims_0"), val = int32(0)];
+            bool gather_53_validate_indices_0 = const()[name = string("gather_53_validate_indices_0"), val = bool(false)];
+            string var_1027_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1027_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_53_to_uint16 = const()[name = string("select_53_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1027_shape_cast_fp16_to_uint16 = cast(dtype = var_1027_shape_cast_fp16_to_uint16_dtype_0, x = var_1027_shape_cast_fp16)[name = string("cast_93")];
+            uint16 gather_53_cast_uint16 = gather(axis = gather_53_axis_0, batch_dims = gather_53_batch_dims_0, indices = select_53_to_uint16, validate_indices = gather_53_validate_indices_0, x = var_1027_shape_cast_fp16_to_uint16)[name = string("gather_53_cast_uint16")];
+            string gather_53_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_53_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_223_axes_0 = const()[name = string("expand_dims_223_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_53_cast_uint16_to_int32 = cast(dtype = gather_53_cast_uint16_to_int32_dtype_0, x = gather_53_cast_uint16)[name = string("cast_92")];
+            tensor<int32, [1]> expand_dims_223 = expand_dims(axes = expand_dims_223_axes_0, x = gather_53_cast_uint16_to_int32)[name = string("expand_dims_223")];
+            tensor<int32, [4]> concat_164 = const()[name = string("concat_164"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [1]> concat_165_values0_0 = const()[name = string("concat_165_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)];
+            bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (concat_165_values0_0, concat_165_values1_0, expand_dims_223, concat_165_values3_0))[name = string("concat_165")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_27_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_27_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_27_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_27_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_164, begin_mask = v_cache2_internal_tensor_assign_27_begin_mask_0, end = concat_165, end_mask = v_cache2_internal_tensor_assign_27_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_27_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_27_stride_0, update = linear_53_cast_fp16, x = coreml_update_state_119)[name = string("v_cache2_internal_tensor_assign_27_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_27_cast_fp16, input = v_cache2)[name = string("coreml_update_state_121_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_121 = read_state(input = v_cache2)[name = string("coreml_update_state_121")];
+            tensor<fp16, [1280, 1280]> var_1049_to_fp16 = const()[name = string("op_1049_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213724416)))];
+            tensor<fp16, [1, ?, 1280]> linear_54_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1049_to_fp16, x = audio_data)[name = string("linear_54_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1053_to_fp16 = const()[name = string("op_1053_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217001280)))];
+            tensor<fp16, [1280]> var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220278144)))];
+            tensor<fp16, [1, ?, 1280]> linear_55_cast_fp16 = linear(bias = var_1054_to_fp16, weight = var_1053_to_fp16, x = audio_data)[name = string("linear_55_cast_fp16")];
+            tensor<int32, [3]> var_1056_shape_cast_fp16 = shape(x = linear_54_cast_fp16)[name = string("op_1056_shape_cast_fp16")];
+            int32 gather_54_axis_0 = const()[name = string("gather_54_axis_0"), val = int32(0)];
+            int32 gather_54_batch_dims_0 = const()[name = string("gather_54_batch_dims_0"), val = int32(0)];
+            bool gather_54_validate_indices_0 = const()[name = string("gather_54_validate_indices_0"), val = bool(false)];
+            string var_1056_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1056_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_54_to_uint16 = const()[name = string("select_54_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1056_shape_cast_fp16_to_uint16 = cast(dtype = var_1056_shape_cast_fp16_to_uint16_dtype_0, x = var_1056_shape_cast_fp16)[name = string("cast_91")];
+            uint16 gather_54_cast_uint16 = gather(axis = gather_54_axis_0, batch_dims = gather_54_batch_dims_0, indices = select_54_to_uint16, validate_indices = gather_54_validate_indices_0, x = var_1056_shape_cast_fp16_to_uint16)[name = string("gather_54_cast_uint16")];
+            string gather_54_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_54_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_54_cast_uint16_to_int32 = cast(dtype = gather_54_cast_uint16_to_int32_dtype_0, x = gather_54_cast_uint16)[name = string("cast_90")];
+            tensor<int32, [1]> expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = gather_54_cast_uint16_to_int32)[name = string("expand_dims_227")];
+            tensor<int32, [4]> concat_167 = const()[name = string("concat_167"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [1]> concat_168_values0_0 = const()[name = string("concat_168_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_168_values1_0 = const()[name = string("concat_168_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_168_values3_0 = const()[name = string("concat_168_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)];
+            bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (concat_168_values0_0, concat_168_values1_0, expand_dims_227, concat_168_values3_0))[name = string("concat_168")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_28_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_28_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_28_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_28_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_167, begin_mask = k_cache2_internal_tensor_assign_28_begin_mask_0, end = concat_168, end_mask = k_cache2_internal_tensor_assign_28_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_28_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_28_stride_0, update = linear_54_cast_fp16, x = coreml_update_state_120)[name = string("k_cache2_internal_tensor_assign_28_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_28_cast_fp16, input = k_cache2)[name = string("coreml_update_state_122_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_122 = read_state(input = k_cache2)[name = string("coreml_update_state_122")];
+            tensor<int32, [3]> var_1061_shape_cast_fp16 = shape(x = linear_55_cast_fp16)[name = string("op_1061_shape_cast_fp16")];
+            int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)];
+            int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)];
+            bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)];
+            string var_1061_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1061_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_55_to_uint16 = const()[name = string("select_55_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1061_shape_cast_fp16_to_uint16 = cast(dtype = var_1061_shape_cast_fp16_to_uint16_dtype_0, x = var_1061_shape_cast_fp16)[name = string("cast_89")];
+            uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_1061_shape_cast_fp16_to_uint16)[name = string("gather_55_cast_uint16")];
+            string gather_55_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_55_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_231_axes_0 = const()[name = string("expand_dims_231_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_55_cast_uint16_to_int32 = cast(dtype = gather_55_cast_uint16_to_int32_dtype_0, x = gather_55_cast_uint16)[name = string("cast_88")];
+            tensor<int32, [1]> expand_dims_231 = expand_dims(axes = expand_dims_231_axes_0, x = gather_55_cast_uint16_to_int32)[name = string("expand_dims_231")];
+            tensor<int32, [4]> concat_170 = const()[name = string("concat_170"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [1]> concat_171_values0_0 = const()[name = string("concat_171_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_171_values1_0 = const()[name = string("concat_171_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)];
+            bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (concat_171_values0_0, concat_171_values1_0, expand_dims_231, concat_171_values3_0))[name = string("concat_171")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_28_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_28_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_28_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_28_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_170, begin_mask = v_cache2_internal_tensor_assign_28_begin_mask_0, end = concat_171, end_mask = v_cache2_internal_tensor_assign_28_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_28_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_28_stride_0, update = linear_55_cast_fp16, x = coreml_update_state_121)[name = string("v_cache2_internal_tensor_assign_28_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_28_cast_fp16, input = v_cache2)[name = string("coreml_update_state_123_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_123 = read_state(input = v_cache2)[name = string("coreml_update_state_123")];
+            tensor<fp16, [1280, 1280]> var_1083_to_fp16 = const()[name = string("op_1083_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220280768)))];
+            tensor<fp16, [1, ?, 1280]> linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1083_to_fp16, x = audio_data)[name = string("linear_56_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1087_to_fp16 = const()[name = string("op_1087_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223557632)))];
+            tensor<fp16, [1280]> var_1088_to_fp16 = const()[name = string("op_1088_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226834496)))];
+            tensor<fp16, [1, ?, 1280]> linear_57_cast_fp16 = linear(bias = var_1088_to_fp16, weight = var_1087_to_fp16, x = audio_data)[name = string("linear_57_cast_fp16")];
+            tensor<int32, [3]> var_1090_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1090_shape_cast_fp16")];
+            int32 gather_56_axis_0 = const()[name = string("gather_56_axis_0"), val = int32(0)];
+            int32 gather_56_batch_dims_0 = const()[name = string("gather_56_batch_dims_0"), val = int32(0)];
+            bool gather_56_validate_indices_0 = const()[name = string("gather_56_validate_indices_0"), val = bool(false)];
+            string var_1090_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1090_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_56_to_uint16 = const()[name = string("select_56_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1090_shape_cast_fp16_to_uint16 = cast(dtype = var_1090_shape_cast_fp16_to_uint16_dtype_0, x = var_1090_shape_cast_fp16)[name = string("cast_87")];
+            uint16 gather_56_cast_uint16 = gather(axis = gather_56_axis_0, batch_dims = gather_56_batch_dims_0, indices = select_56_to_uint16, validate_indices = gather_56_validate_indices_0, x = var_1090_shape_cast_fp16_to_uint16)[name = string("gather_56_cast_uint16")];
+            string gather_56_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_56_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_235_axes_0 = const()[name = string("expand_dims_235_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_56_cast_uint16_to_int32 = cast(dtype = gather_56_cast_uint16_to_int32_dtype_0, x = gather_56_cast_uint16)[name = string("cast_86")];
+            tensor<int32, [1]> expand_dims_235 = expand_dims(axes = expand_dims_235_axes_0, x = gather_56_cast_uint16_to_int32)[name = string("expand_dims_235")];
+            tensor<int32, [4]> concat_173 = const()[name = string("concat_173"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [1]> concat_174_values0_0 = const()[name = string("concat_174_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_174_values1_0 = const()[name = string("concat_174_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_174_values3_0 = const()[name = string("concat_174_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_174_axis_0 = const()[name = string("concat_174_axis_0"), val = int32(0)];
+            bool concat_174_interleave_0 = const()[name = string("concat_174_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_174 = concat(axis = concat_174_axis_0, interleave = concat_174_interleave_0, values = (concat_174_values0_0, concat_174_values1_0, expand_dims_235, concat_174_values3_0))[name = string("concat_174")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_29_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_29_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_29_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_29_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_173, begin_mask = k_cache2_internal_tensor_assign_29_begin_mask_0, end = concat_174, end_mask = k_cache2_internal_tensor_assign_29_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_29_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_29_stride_0, update = linear_56_cast_fp16, x = coreml_update_state_122)[name = string("k_cache2_internal_tensor_assign_29_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_29_cast_fp16, input = k_cache2)[name = string("coreml_update_state_124_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_124 = read_state(input = k_cache2)[name = string("coreml_update_state_124")];
+            tensor<int32, [3]> var_1095_shape_cast_fp16 = shape(x = linear_57_cast_fp16)[name = string("op_1095_shape_cast_fp16")];
+            int32 gather_57_axis_0 = const()[name = string("gather_57_axis_0"), val = int32(0)];
+            int32 gather_57_batch_dims_0 = const()[name = string("gather_57_batch_dims_0"), val = int32(0)];
+            bool gather_57_validate_indices_0 = const()[name = string("gather_57_validate_indices_0"), val = bool(false)];
+            string var_1095_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1095_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_57_to_uint16 = const()[name = string("select_57_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1095_shape_cast_fp16_to_uint16 = cast(dtype = var_1095_shape_cast_fp16_to_uint16_dtype_0, x = var_1095_shape_cast_fp16)[name = string("cast_85")];
+            uint16 gather_57_cast_uint16 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57_to_uint16, validate_indices = gather_57_validate_indices_0, x = var_1095_shape_cast_fp16_to_uint16)[name = string("gather_57_cast_uint16")];
+            string gather_57_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_57_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_239_axes_0 = const()[name = string("expand_dims_239_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_57_cast_uint16_to_int32 = cast(dtype = gather_57_cast_uint16_to_int32_dtype_0, x = gather_57_cast_uint16)[name = string("cast_84")];
+            tensor<int32, [1]> expand_dims_239 = expand_dims(axes = expand_dims_239_axes_0, x = gather_57_cast_uint16_to_int32)[name = string("expand_dims_239")];
+            tensor<int32, [4]> concat_176 = const()[name = string("concat_176"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [1]> concat_177_values0_0 = const()[name = string("concat_177_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_177_values1_0 = const()[name = string("concat_177_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_177_values3_0 = const()[name = string("concat_177_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_177_axis_0 = const()[name = string("concat_177_axis_0"), val = int32(0)];
+            bool concat_177_interleave_0 = const()[name = string("concat_177_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_177 = concat(axis = concat_177_axis_0, interleave = concat_177_interleave_0, values = (concat_177_values0_0, concat_177_values1_0, expand_dims_239, concat_177_values3_0))[name = string("concat_177")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_29_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_29_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_29_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_29_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_176, begin_mask = v_cache2_internal_tensor_assign_29_begin_mask_0, end = concat_177, end_mask = v_cache2_internal_tensor_assign_29_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_29_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_29_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_123)[name = string("v_cache2_internal_tensor_assign_29_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_29_cast_fp16, input = v_cache2)[name = string("coreml_update_state_125_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_125 = read_state(input = v_cache2)[name = string("coreml_update_state_125")];
+            tensor<fp16, [1280, 1280]> var_1117_to_fp16 = const()[name = string("op_1117_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226837120)))];
+            tensor<fp16, [1, ?, 1280]> linear_58_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1117_to_fp16, x = audio_data)[name = string("linear_58_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1121_to_fp16 = const()[name = string("op_1121_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230113984)))];
+            tensor<fp16, [1280]> var_1122_to_fp16 = const()[name = string("op_1122_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233390848)))];
+            tensor<fp16, [1, ?, 1280]> linear_59_cast_fp16 = linear(bias = var_1122_to_fp16, weight = var_1121_to_fp16, x = audio_data)[name = string("linear_59_cast_fp16")];
+            tensor<int32, [3]> var_1124_shape_cast_fp16 = shape(x = linear_58_cast_fp16)[name = string("op_1124_shape_cast_fp16")];
+            int32 gather_58_axis_0 = const()[name = string("gather_58_axis_0"), val = int32(0)];
+            int32 gather_58_batch_dims_0 = const()[name = string("gather_58_batch_dims_0"), val = int32(0)];
+            bool gather_58_validate_indices_0 = const()[name = string("gather_58_validate_indices_0"), val = bool(false)];
+            string var_1124_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1124_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_58_to_uint16 = const()[name = string("select_58_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1124_shape_cast_fp16_to_uint16 = cast(dtype = var_1124_shape_cast_fp16_to_uint16_dtype_0, x = var_1124_shape_cast_fp16)[name = string("cast_83")];
+            uint16 gather_58_cast_uint16 = gather(axis = gather_58_axis_0, batch_dims = gather_58_batch_dims_0, indices = select_58_to_uint16, validate_indices = gather_58_validate_indices_0, x = var_1124_shape_cast_fp16_to_uint16)[name = string("gather_58_cast_uint16")];
+            string gather_58_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_58_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_58_cast_uint16_to_int32 = cast(dtype = gather_58_cast_uint16_to_int32_dtype_0, x = gather_58_cast_uint16)[name = string("cast_82")];
+            tensor<int32, [1]> expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = gather_58_cast_uint16_to_int32)[name = string("expand_dims_243")];
+            tensor<int32, [4]> concat_179 = const()[name = string("concat_179"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [1]> concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_180_values1_0 = const()[name = string("concat_180_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_180_values3_0 = const()[name = string("concat_180_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)];
+            bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, concat_180_values1_0, expand_dims_243, concat_180_values3_0))[name = string("concat_180")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_30_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_30_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_30_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_30_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_179, begin_mask = k_cache2_internal_tensor_assign_30_begin_mask_0, end = concat_180, end_mask = k_cache2_internal_tensor_assign_30_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_30_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_30_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_124)[name = string("k_cache2_internal_tensor_assign_30_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_30_cast_fp16, input = k_cache2)[name = string("coreml_update_state_126_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_126 = read_state(input = k_cache2)[name = string("coreml_update_state_126")];
+            tensor<int32, [3]> var_1129_shape_cast_fp16 = shape(x = linear_59_cast_fp16)[name = string("op_1129_shape_cast_fp16")];
+            int32 gather_59_axis_0 = const()[name = string("gather_59_axis_0"), val = int32(0)];
+            int32 gather_59_batch_dims_0 = const()[name = string("gather_59_batch_dims_0"), val = int32(0)];
+            bool gather_59_validate_indices_0 = const()[name = string("gather_59_validate_indices_0"), val = bool(false)];
+            string var_1129_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1129_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_59_to_uint16 = const()[name = string("select_59_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1129_shape_cast_fp16_to_uint16 = cast(dtype = var_1129_shape_cast_fp16_to_uint16_dtype_0, x = var_1129_shape_cast_fp16)[name = string("cast_81")];
+            uint16 gather_59_cast_uint16 = gather(axis = gather_59_axis_0, batch_dims = gather_59_batch_dims_0, indices = select_59_to_uint16, validate_indices = gather_59_validate_indices_0, x = var_1129_shape_cast_fp16_to_uint16)[name = string("gather_59_cast_uint16")];
+            string gather_59_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_59_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_247_axes_0 = const()[name = string("expand_dims_247_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_59_cast_uint16_to_int32 = cast(dtype = gather_59_cast_uint16_to_int32_dtype_0, x = gather_59_cast_uint16)[name = string("cast_80")];
+            tensor<int32, [1]> expand_dims_247 = expand_dims(axes = expand_dims_247_axes_0, x = gather_59_cast_uint16_to_int32)[name = string("expand_dims_247")];
+            tensor<int32, [4]> concat_182 = const()[name = string("concat_182"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [1]> concat_183_values0_0 = const()[name = string("concat_183_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)];
+            bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (concat_183_values0_0, concat_183_values1_0, expand_dims_247, concat_183_values3_0))[name = string("concat_183")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_30_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_30_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_30_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_30_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_182, begin_mask = v_cache2_internal_tensor_assign_30_begin_mask_0, end = concat_183, end_mask = v_cache2_internal_tensor_assign_30_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_30_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_30_stride_0, update = linear_59_cast_fp16, x = coreml_update_state_125)[name = string("v_cache2_internal_tensor_assign_30_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_30_cast_fp16, input = v_cache2)[name = string("coreml_update_state_127_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_127 = read_state(input = v_cache2)[name = string("coreml_update_state_127")];
+            tensor<fp16, [1280, 1280]> var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233393472)))];
+            tensor<fp16, [1, ?, 1280]> linear_60_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1151_to_fp16, x = audio_data)[name = string("linear_60_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1155_to_fp16 = const()[name = string("op_1155_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236670336)))];
+            tensor<fp16, [1280]> var_1156_to_fp16 = const()[name = string("op_1156_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239947200)))];
+            tensor<fp16, [1, ?, 1280]> linear_61_cast_fp16 = linear(bias = var_1156_to_fp16, weight = var_1155_to_fp16, x = audio_data)[name = string("linear_61_cast_fp16")];
+            tensor<int32, [3]> var_1158_shape_cast_fp16 = shape(x = linear_60_cast_fp16)[name = string("op_1158_shape_cast_fp16")];
+            int32 gather_60_axis_0 = const()[name = string("gather_60_axis_0"), val = int32(0)];
+            int32 gather_60_batch_dims_0 = const()[name = string("gather_60_batch_dims_0"), val = int32(0)];
+            bool gather_60_validate_indices_0 = const()[name = string("gather_60_validate_indices_0"), val = bool(false)];
+            string var_1158_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1158_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_60_to_uint16 = const()[name = string("select_60_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1158_shape_cast_fp16_to_uint16 = cast(dtype = var_1158_shape_cast_fp16_to_uint16_dtype_0, x = var_1158_shape_cast_fp16)[name = string("cast_79")];
+            uint16 gather_60_cast_uint16 = gather(axis = gather_60_axis_0, batch_dims = gather_60_batch_dims_0, indices = select_60_to_uint16, validate_indices = gather_60_validate_indices_0, x = var_1158_shape_cast_fp16_to_uint16)[name = string("gather_60_cast_uint16")];
+            string gather_60_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_60_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_251_axes_0 = const()[name = string("expand_dims_251_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_60_cast_uint16_to_int32 = cast(dtype = gather_60_cast_uint16_to_int32_dtype_0, x = gather_60_cast_uint16)[name = string("cast_78")];
+            tensor<int32, [1]> expand_dims_251 = expand_dims(axes = expand_dims_251_axes_0, x = gather_60_cast_uint16_to_int32)[name = string("expand_dims_251")];
+            tensor<int32, [4]> concat_185 = const()[name = string("concat_185"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [1]> concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_186_values1_0 = const()[name = string("concat_186_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_186_values3_0 = const()[name = string("concat_186_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)];
+            bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, concat_186_values1_0, expand_dims_251, concat_186_values3_0))[name = string("concat_186")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_31_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_31_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_31_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_31_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_185, begin_mask = k_cache2_internal_tensor_assign_31_begin_mask_0, end = concat_186, end_mask = k_cache2_internal_tensor_assign_31_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_31_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_31_stride_0, update = linear_60_cast_fp16, x = coreml_update_state_126)[name = string("k_cache2_internal_tensor_assign_31_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_31_cast_fp16, input = k_cache2)[name = string("coreml_update_state_128_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_128 = read_state(input = k_cache2)[name = string("coreml_update_state_128")];
+            tensor<int32, [3]> var_1163_shape_cast_fp16 = shape(x = linear_61_cast_fp16)[name = string("op_1163_shape_cast_fp16")];
+            int32 gather_61_axis_0 = const()[name = string("gather_61_axis_0"), val = int32(0)];
+            int32 gather_61_batch_dims_0 = const()[name = string("gather_61_batch_dims_0"), val = int32(0)];
+            bool gather_61_validate_indices_0 = const()[name = string("gather_61_validate_indices_0"), val = bool(false)];
+            string var_1163_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1163_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_61_to_uint16 = const()[name = string("select_61_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1163_shape_cast_fp16_to_uint16 = cast(dtype = var_1163_shape_cast_fp16_to_uint16_dtype_0, x = var_1163_shape_cast_fp16)[name = string("cast_77")];
+            uint16 gather_61_cast_uint16 = gather(axis = gather_61_axis_0, batch_dims = gather_61_batch_dims_0, indices = select_61_to_uint16, validate_indices = gather_61_validate_indices_0, x = var_1163_shape_cast_fp16_to_uint16)[name = string("gather_61_cast_uint16")];
+            string gather_61_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_61_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_255_axes_0 = const()[name = string("expand_dims_255_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_61_cast_uint16_to_int32 = cast(dtype = gather_61_cast_uint16_to_int32_dtype_0, x = gather_61_cast_uint16)[name = string("cast_76")];
+            tensor<int32, [1]> expand_dims_255 = expand_dims(axes = expand_dims_255_axes_0, x = gather_61_cast_uint16_to_int32)[name = string("expand_dims_255")];
+            tensor<int32, [4]> concat_188 = const()[name = string("concat_188"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [1]> concat_189_values0_0 = const()[name = string("concat_189_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_189_values1_0 = const()[name = string("concat_189_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_189_values3_0 = const()[name = string("concat_189_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_189_axis_0 = const()[name = string("concat_189_axis_0"), val = int32(0)];
+            bool concat_189_interleave_0 = const()[name = string("concat_189_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_189 = concat(axis = concat_189_axis_0, interleave = concat_189_interleave_0, values = (concat_189_values0_0, concat_189_values1_0, expand_dims_255, concat_189_values3_0))[name = string("concat_189")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_31_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_31_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_31_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_31_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_188, begin_mask = v_cache2_internal_tensor_assign_31_begin_mask_0, end = concat_189, end_mask = v_cache2_internal_tensor_assign_31_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_31_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_31_stride_0, update = linear_61_cast_fp16, x = coreml_update_state_127)[name = string("v_cache2_internal_tensor_assign_31_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_31_cast_fp16, input = v_cache2)[name = string("coreml_update_state_129_write_state")];
+            tensor<fp16, [32, 1, 1500, 1280]> coreml_update_state_129 = read_state(input = v_cache2)[name = string("coreml_update_state_129")];
+            tensor<fp16, [1280, 1280]> var_1185_to_fp16 = const()[name = string("op_1185_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239949824)))];
+            tensor<fp16, [1, ?, 1280]> linear_62_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1185_to_fp16, x = audio_data)[name = string("linear_62_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1189_to_fp16 = const()[name = string("op_1189_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243226688)))];
+            tensor<fp16, [1280]> var_1190_to_fp16 = const()[name = string("op_1190_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246503552)))];
+            tensor<fp16, [1, ?, 1280]> linear_63_cast_fp16 = linear(bias = var_1190_to_fp16, weight = var_1189_to_fp16, x = audio_data)[name = string("linear_63_cast_fp16")];
+            tensor<int32, [3]> var_1192_shape_cast_fp16 = shape(x = linear_62_cast_fp16)[name = string("op_1192_shape_cast_fp16")];
+            int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)];
+            int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)];
+            bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)];
+            string var_1192_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1192_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1192_shape_cast_fp16_to_uint16 = cast(dtype = var_1192_shape_cast_fp16_to_uint16_dtype_0, x = var_1192_shape_cast_fp16)[name = string("cast_75")];
+            uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1192_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")];
+            string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_74")];
+            tensor<int32, [1]> expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = gather_62_cast_uint16_to_int32)[name = string("expand_dims_259")];
+            tensor<int32, [4]> concat_191 = const()[name = string("concat_191"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [1]> concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_192_values1_0 = const()[name = string("concat_192_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_192_values3_0 = const()[name = string("concat_192_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)];
+            bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, concat_192_values1_0, expand_dims_259, concat_192_values3_0))[name = string("concat_192")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_32_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_32_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_32_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_32_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> k_cache2_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_191, begin_mask = k_cache2_internal_tensor_assign_32_begin_mask_0, end = concat_192, end_mask = k_cache2_internal_tensor_assign_32_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_32_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_32_stride_0, update = linear_62_cast_fp16, x = coreml_update_state_128)[name = string("k_cache2_internal_tensor_assign_32_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_32_cast_fp16, input = k_cache2)[name = string("coreml_update_state_130_write_state")];
+            tensor<int32, [3]> var_1197_shape_cast_fp16 = shape(x = linear_63_cast_fp16)[name = string("op_1197_shape_cast_fp16")];
+            int32 gather_63_axis_0 = const()[name = string("gather_63_axis_0"), val = int32(0)];
+            int32 gather_63_batch_dims_0 = const()[name = string("gather_63_batch_dims_0"), val = int32(0)];
+            bool gather_63_validate_indices_0 = const()[name = string("gather_63_validate_indices_0"), val = bool(false)];
+            string var_1197_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1197_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_63_to_uint16 = const()[name = string("select_63_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1197_shape_cast_fp16_to_uint16 = cast(dtype = var_1197_shape_cast_fp16_to_uint16_dtype_0, x = var_1197_shape_cast_fp16)[name = string("cast_73")];
+            uint16 gather_63_cast_uint16 = gather(axis = gather_63_axis_0, batch_dims = gather_63_batch_dims_0, indices = select_63_to_uint16, validate_indices = gather_63_validate_indices_0, x = var_1197_shape_cast_fp16_to_uint16)[name = string("gather_63_cast_uint16")];
+            string gather_63_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_63_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_263_axes_0 = const()[name = string("expand_dims_263_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_63_cast_uint16_to_int32 = cast(dtype = gather_63_cast_uint16_to_int32_dtype_0, x = gather_63_cast_uint16)[name = string("cast_72")];
+            tensor<int32, [1]> expand_dims_263 = expand_dims(axes = expand_dims_263_axes_0, x = gather_63_cast_uint16_to_int32)[name = string("expand_dims_263")];
+            tensor<int32, [4]> concat_194 = const()[name = string("concat_194"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [1]> concat_195_values0_0 = const()[name = string("concat_195_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_195_values1_0 = const()[name = string("concat_195_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_195_values3_0 = const()[name = string("concat_195_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)];
+            bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (concat_195_values0_0, concat_195_values1_0, expand_dims_263, concat_195_values3_0))[name = string("concat_195")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_32_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_32_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_32_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_32_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 1500, 1280]> v_cache2_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_194, begin_mask = v_cache2_internal_tensor_assign_32_begin_mask_0, end = concat_195, end_mask = v_cache2_internal_tensor_assign_32_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_32_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_32_stride_0, update = linear_63_cast_fp16, x = coreml_update_state_129)[name = string("v_cache2_internal_tensor_assign_32_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_32_cast_fp16, input = v_cache2)[name = string("coreml_update_state_131_write_state")];
+        } -> (dummy);
+}
\ No newline at end of file
diff --git a/large-v3/decoder_first.mlmodelc/weights/weight.bin b/large-v3/decoder_first.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..eb46e5fd8b19039b4494818092abc9e777f54b54
--- /dev/null
+++ b/large-v3/decoder_first.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:346ed969f2a1ddb144c4add194c7b2a9a7d7b4a2e536d1e4a2afbfe5a4f62818
+size 246506176
diff --git a/large-v3/decoder_second.mlmodelc/analytics/coremldata.bin b/large-v3/decoder_second.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..259e441785b4f9b0bd793e92eb8ef632348844f2
--- /dev/null
+++ b/large-v3/decoder_second.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e4f1e5461c9555f7720e35c3cf749dee1a467277881458a87b7f2d35016831c
+size 243
diff --git a/large-v3/decoder_second.mlmodelc/coremldata.bin b/large-v3/decoder_second.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..06d384eab53d9890f25a9f07a76b3771dcd2b170
--- /dev/null
+++ b/large-v3/decoder_second.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5601244df54c60a16c26b761742867d06c6ef440ab8b0776ce5f6d1b4875c95
+size 487
diff --git a/large-v3/decoder_second.mlmodelc/metadata.json b/large-v3/decoder_second.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..722e4912d37eb8c71f0d55eb4ea48b33db80210d
--- /dev/null
+++ b/large-v3/decoder_second.mlmodelc/metadata.json
@@ -0,0 +1,127 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.linear" : 257,
+      "Ios18.readState" : 66,
+      "Ios18.expandDims" : 33,
+      "Ios18.sub" : 1,
+      "Ios18.matmul" : 128,
+      "Ios18.gelu" : 32,
+      "Ios18.gather" : 35,
+      "Ios18.concat" : 162,
+      "Shape" : 34,
+      "Ios18.add" : 161,
+      "Ios18.sliceUpdate" : 128,
+      "Ios18.sliceByIndex" : 257,
+      "Ios18.layerNorm" : 97,
+      "Ios18.cast" : 68,
+      "Ios18.transpose" : 256,
+      "Ios18.writeState" : 64,
+      "Ios18.reshape" : 256,
+      "Ios18.softmax" : 64,
+      "Ios18.mul" : 128
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 448 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 448, 1280]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 448 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 448, 1280]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 1500, 1280]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[32, 1, 1500, 1280]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Int32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Int32 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "token_data",
+        "shortDescription" : ""
+      },
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Float16 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "offset_mask",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_second",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/large-v3/decoder_second.mlmodelc/model.mil b/large-v3/decoder_second.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..c7037f845098a20f94eecbcc56cf6eafa648d786
--- /dev/null
+++ b/large-v3/decoder_second.mlmodelc/model.mil
@@ -0,0 +1,6298 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(state<tensor<fp16, [32, 1, 448, 1280]>> k_cache1, state<tensor<fp16, [32, 1, 1500, 1280]>> k_cache2, tensor<fp16, [1, ?]> offset_mask, tensor<int32, [1, ?]> token_data, state<tensor<fp16, [32, 1, 448, 1280]>> v_cache1, state<tensor<fp16, [32, 1, 1500, 1280]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] {
+            tensor<int32, [2]> var_78_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_78_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_78_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_78_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [2]> var_78_shape_cast_fp16_to_int16 = cast(dtype = var_78_shape_cast_fp16_to_int16_dtype_0, x = var_78_shape_cast_fp16)[name = string("cast_394")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_78_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [2]> var_82_shape = shape(x = token_data)[name = string("op_82_shape")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_82_shape_to_uint16_dtype_0 = const()[name = string("op_82_shape_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [2]> var_82_shape_to_uint16 = cast(dtype = var_82_shape_to_uint16_dtype_0, x = var_82_shape)[name = string("cast_392")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_82_shape_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_391")];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_393")];
+            int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")];
+            int32 var_154_axis_0 = const()[name = string("op_154_axis_0"), val = int32(0)];
+            int32 var_154_batch_dims_0 = const()[name = string("op_154_batch_dims_0"), val = int32(0)];
+            bool var_154_validate_indices_0 = const()[name = string("op_154_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [51866, 1280]> token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor<fp16, [51866, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, ?, 1280]> var_154_cast_fp16 = gather(axis = var_154_axis_0, batch_dims = var_154_batch_dims_0, indices = token_data, validate_indices = var_154_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_154_cast_fp16")];
+            int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)];
+            int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)];
+            bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")];
+            int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(1280)];
+            int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)];
+            bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")];
+            tensor<bool, [2]> var_157_end_mask_0 = const()[name = string("op_157_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 1280]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [448, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132777088)))];
+            tensor<fp16, [?, ?]> var_157_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_157_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_157_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_3_cast_fp16 = add(x = var_154_cast_fp16, y = var_157_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<fp16, [32, 1, 448, 1280]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")];
+            tensor<fp16, [32, 1, 448, 1280]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")];
+            tensor<fp16, [32, 1, 1500, 1280]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<int32, [4]> k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")];
+            tensor<fp16, [32, 1, 1500, 1280]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<int32, [4]> v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")];
+            int32 var_180 = const()[name = string("op_180"), val = int32(-1)];
+            tensor<int32, [1]> var_198_axes_0 = const()[name = string("op_198_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133924032)))];
+            tensor<fp16, [1280]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133926656)))];
+            fp16 var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_198_cast_fp16 = layer_norm(axes = var_198_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_198_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_209_to_fp16 = const()[name = string("op_209_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133929280)))];
+            tensor<fp16, [1280]> var_210_to_fp16 = const()[name = string("op_210_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137206144)))];
+            tensor<fp16, [1, ?, 1280]> linear_0_cast_fp16 = linear(bias = var_210_to_fp16, weight = var_209_to_fp16, x = var_198_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137208768)))];
+            tensor<fp16, [1280]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140485632)))];
+            tensor<fp16, [1, ?, 1280]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_213_to_fp16, x = var_198_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140488256)))];
+            tensor<fp16, [1280]> var_218_to_fp16 = const()[name = string("op_218_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143765120)))];
+            tensor<fp16, [1, ?, 1280]> linear_2_cast_fp16 = linear(bias = var_218_to_fp16, weight = var_217_to_fp16, x = var_198_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [3]> var_220_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_220_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_220_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_220_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_220_shape_cast_fp16_to_uint16 = cast(dtype = var_220_shape_cast_fp16_to_uint16_dtype_0, x = var_220_shape_cast_fp16)[name = string("cast_390")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_220_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_389")];
+            int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")];
+            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")];
+            tensor<int32, [1]> expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")];
+            tensor<int32, [1]> concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)];
+            bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")];
+            tensor<int32, [1]> concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)];
+            bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_64_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_64 = read_state(input = k_cache1)[name = string("coreml_update_state_64")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_65_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_65 = read_state(input = v_cache1)[name = string("coreml_update_state_65")];
+            int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)];
+            int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(1280)];
+            int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
+            bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")];
+            tensor<int32, [3]> var_236_begin_0 = const()[name = string("op_236_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_236_end_mask_0 = const()[name = string("op_236_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = concat_10, end_mask = var_236_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_236_cast_fp16")];
+            tensor<int32, [3]> var_239_begin_0 = const()[name = string("op_239_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_239_end_mask_0 = const()[name = string("op_239_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = concat_10, end_mask = var_239_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_239_cast_fp16")];
+            tensor<int32, [4]> concat_12x = const()[name = string("concat_12x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_249_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_249_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_3_cast_fp16 = mul(x = var_249_cast_fp16, y = const_160_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> concat_13x = const()[name = string("concat_13x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_256_cast_fp16 = reshape(shape = concat_13x, x = var_236_cast_fp16)[name = string("op_256_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_5_cast_fp16 = mul(x = var_256_cast_fp16, y = const_161_to_fp16)[name = string("k_5_cast_fp16")];
+            tensor<int32, [4]> concat_14x = const()[name = string("concat_14x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_263_cast_fp16 = reshape(shape = concat_14x, x = var_239_cast_fp16)[name = string("op_263_cast_fp16")];
+            tensor<int32, [4]> var_264 = const()[name = string("op_264"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_257_perm_0 = const()[name = string("transpose_257_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_258_perm_0 = const()[name = string("transpose_258_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_258 = transpose(perm = transpose_258_perm_0, x = k_5_cast_fp16)[name = string("transpose_638")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_257 = transpose(perm = transpose_257_perm_0, x = q_3_cast_fp16)[name = string("transpose_639")];
+            tensor<fp16, [1, 20, ?, ?]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_257, y = transpose_258)[name = string("qk_1_cast_fp16")];
+            int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")];
+            tensor<int32, [2]> var_267_begin_0 = const()[name = string("op_267_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_267_end_mask_0 = const()[name = string("op_267_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 448]> mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor<fp16, [448, 448]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143767744)))];
+            tensor<fp16, [?, 448]> var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = concat_15, end_mask = var_267_end_mask_0, x = mask_to_fp16)[name = string("op_267_cast_fp16")];
+            int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)];
+            int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)];
+            bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")];
+            tensor<int32, [2]> var_268_begin_0 = const()[name = string("op_268_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_268_end_mask_0 = const()[name = string("op_268_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_268_cast_fp16 = slice_by_index(begin = var_268_begin_0, end = concat_16, end_mask = var_268_end_mask_0, x = var_267_cast_fp16)[name = string("op_268_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_268_cast_fp16)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_271_cast_fp16 = softmax(axis = var_180, x = qk_3_cast_fp16)[name = string("op_271_cast_fp16")];
+            bool var_273_transpose_x_0 = const()[name = string("op_273_transpose_x_0"), val = bool(false)];
+            bool var_273_transpose_y_0 = const()[name = string("op_273_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_5_cast_fp16 = transpose(perm = var_264, x = var_263_cast_fp16)[name = string("transpose_640")];
+            tensor<fp16, [1, 20, ?, 64]> var_273_cast_fp16 = matmul(transpose_x = var_273_transpose_x_0, transpose_y = var_273_transpose_y_0, x = var_271_cast_fp16, y = v_5_cast_fp16)[name = string("op_273_cast_fp16")];
+            tensor<int32, [4]> var_274 = const()[name = string("op_274"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_17x = const()[name = string("concat_17x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_275_cast_fp16 = transpose(perm = var_274, x = var_273_cast_fp16)[name = string("transpose_637")];
+            tensor<fp16, [1, ?, 1280]> x_7_cast_fp16 = reshape(shape = concat_17x, x = var_275_cast_fp16)[name = string("x_7_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144169216)))];
+            tensor<fp16, [1280]> var_280_to_fp16 = const()[name = string("op_280_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147446080)))];
+            tensor<fp16, [1, ?, 1280]> linear_3_cast_fp16 = linear(bias = var_280_to_fp16, weight = var_279_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")];
+            tensor<int32, [1]> var_287_axes_0 = const()[name = string("op_287_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147448704)))];
+            tensor<fp16, [1280]> blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147451328)))];
+            tensor<fp16, [1, ?, 1280]> var_287_cast_fp16 = layer_norm(axes = var_287_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_287_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_296_to_fp16 = const()[name = string("op_296_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147453952)))];
+            tensor<fp16, [1280]> var_297_to_fp16 = const()[name = string("op_297_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150730816)))];
+            tensor<fp16, [1, ?, 1280]> linear_4_cast_fp16 = linear(bias = var_297_to_fp16, weight = var_296_to_fp16, x = var_287_cast_fp16)[name = string("linear_4_cast_fp16")];
+            tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor<fp16, [1, 1500, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150733440)))];
+            tensor<fp16, [1, 1500, 1280]> k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_22x = const()[name = string("concat_22x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_317_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_317_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_7_cast_fp16 = mul(x = var_317_cast_fp16, y = const_162_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_323 = const()[name = string("op_323"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_324_cast_fp16 = reshape(shape = var_323, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_324_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_9_cast_fp16 = mul(x = var_324_cast_fp16, y = const_163_to_fp16)[name = string("k_9_cast_fp16")];
+            tensor<int32, [4]> var_330 = const()[name = string("op_330"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_331_cast_fp16 = reshape(shape = var_330, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_331_cast_fp16")];
+            tensor<int32, [4]> var_332 = const()[name = string("op_332"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_259_perm_0 = const()[name = string("transpose_259_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_260_perm_0 = const()[name = string("transpose_260_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_260 = transpose(perm = transpose_260_perm_0, x = k_9_cast_fp16)[name = string("transpose_634")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_259 = transpose(perm = transpose_259_perm_0, x = q_7_cast_fp16)[name = string("transpose_635")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_259, y = transpose_260)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_336_cast_fp16 = softmax(axis = var_180, x = qk_5_cast_fp16)[name = string("op_336_cast_fp16")];
+            bool var_338_transpose_x_0 = const()[name = string("op_338_transpose_x_0"), val = bool(false)];
+            bool var_338_transpose_y_0 = const()[name = string("op_338_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_9_cast_fp16 = transpose(perm = var_332, x = var_331_cast_fp16)[name = string("transpose_636")];
+            tensor<fp16, [1, 20, ?, 64]> var_338_cast_fp16 = matmul(transpose_x = var_338_transpose_x_0, transpose_y = var_338_transpose_y_0, x = var_336_cast_fp16, y = v_9_cast_fp16)[name = string("op_338_cast_fp16")];
+            tensor<int32, [4]> var_339 = const()[name = string("op_339"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_23x = const()[name = string("concat_23x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_340_cast_fp16 = transpose(perm = var_339, x = var_338_cast_fp16)[name = string("transpose_633")];
+            tensor<fp16, [1, ?, 1280]> x_13_cast_fp16 = reshape(shape = concat_23x, x = var_340_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154573504)))];
+            tensor<fp16, [1280]> var_345_to_fp16 = const()[name = string("op_345_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157850368)))];
+            tensor<fp16, [1, ?, 1280]> linear_5_cast_fp16 = linear(bias = var_345_to_fp16, weight = var_344_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")];
+            tensor<int32, [1]> var_352_axes_0 = const()[name = string("op_352_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157852992)))];
+            tensor<fp16, [1280]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157855616)))];
+            tensor<fp16, [1, ?, 1280]> var_352_cast_fp16 = layer_norm(axes = var_352_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_352_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_361_to_fp16 = const()[name = string("op_361_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157858240)))];
+            tensor<fp16, [5120]> var_362_to_fp16 = const()[name = string("op_362_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170965504)))];
+            tensor<fp16, [1, ?, 5120]> linear_6_cast_fp16 = linear(bias = var_362_to_fp16, weight = var_361_to_fp16, x = var_352_cast_fp16)[name = string("linear_6_cast_fp16")];
+            string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170975808)))];
+            tensor<fp16, [1280]> var_368_to_fp16 = const()[name = string("op_368_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184083072)))];
+            tensor<fp16, [1, ?, 1280]> linear_7_cast_fp16 = linear(bias = var_368_to_fp16, weight = var_367_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_64)[name = string("k_cache_5_cast_fp16")];
+            tensor<int32, [4]> v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_65)[name = string("v_cache_5_cast_fp16")];
+            tensor<int32, [4]> k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")];
+            tensor<int32, [4]> v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")];
+            int32 var_391 = const()[name = string("op_391"), val = int32(-1)];
+            tensor<int32, [1]> var_409_axes_0 = const()[name = string("op_409_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184085696)))];
+            tensor<fp16, [1280]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184088320)))];
+            fp16 var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_409_cast_fp16 = layer_norm(axes = var_409_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_409_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_420_to_fp16 = const()[name = string("op_420_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184090944)))];
+            tensor<fp16, [1280]> var_421_to_fp16 = const()[name = string("op_421_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187367808)))];
+            tensor<fp16, [1, ?, 1280]> linear_8_cast_fp16 = linear(bias = var_421_to_fp16, weight = var_420_to_fp16, x = var_409_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_424_to_fp16 = const()[name = string("op_424_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187370432)))];
+            tensor<fp16, [1, ?, 1280]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_424_to_fp16, x = var_409_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_428_to_fp16 = const()[name = string("op_428_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190647296)))];
+            tensor<fp16, [1280]> var_429_to_fp16 = const()[name = string("op_429_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193924160)))];
+            tensor<fp16, [1, ?, 1280]> linear_10_cast_fp16 = linear(bias = var_429_to_fp16, weight = var_428_to_fp16, x = var_409_cast_fp16)[name = string("linear_10_cast_fp16")];
+            tensor<int32, [3]> var_431_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_431_shape_cast_fp16")];
+            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
+            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
+            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
+            string var_431_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_431_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_431_shape_cast_fp16_to_uint16 = cast(dtype = var_431_shape_cast_fp16_to_uint16_dtype_0, x = var_431_shape_cast_fp16)[name = string("cast_388")];
+            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_431_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
+            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_387")];
+            int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")];
+            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")];
+            tensor<int32, [1]> concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor<int32, [1]>([1])];
+            int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
+            bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_64)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_66 = read_state(input = k_cache1)[name = string("coreml_update_state_66")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_65)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_67 = read_state(input = v_cache1)[name = string("coreml_update_state_67")];
+            int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)];
+            int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(1280)];
+            int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)];
+            bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")];
+            tensor<int32, [3]> var_447_begin_0 = const()[name = string("op_447_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_447_end_mask_0 = const()[name = string("op_447_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_447_cast_fp16 = slice_by_index(begin = var_447_begin_0, end = concat_32, end_mask = var_447_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_447_cast_fp16")];
+            tensor<int32, [3]> var_450_begin_0 = const()[name = string("op_450_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_450_end_mask_0 = const()[name = string("op_450_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_450_cast_fp16 = slice_by_index(begin = var_450_begin_0, end = concat_32, end_mask = var_450_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_450_cast_fp16")];
+            tensor<int32, [4]> concat_34x = const()[name = string("concat_34x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_460_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_460_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_164_to_fp16 = const()[name = string("const_164_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_11_cast_fp16 = mul(x = var_460_cast_fp16, y = const_164_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> concat_35x = const()[name = string("concat_35x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_467_cast_fp16 = reshape(shape = concat_35x, x = var_447_cast_fp16)[name = string("op_467_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_165_to_fp16 = const()[name = string("const_165_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_15_cast_fp16 = mul(x = var_467_cast_fp16, y = const_165_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> concat_36x = const()[name = string("concat_36x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_474_cast_fp16 = reshape(shape = concat_36x, x = var_450_cast_fp16)[name = string("op_474_cast_fp16")];
+            tensor<int32, [4]> var_475 = const()[name = string("op_475"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_261_perm_0 = const()[name = string("transpose_261_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_262_perm_0 = const()[name = string("transpose_262_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_262 = transpose(perm = transpose_262_perm_0, x = k_15_cast_fp16)[name = string("transpose_630")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_261 = transpose(perm = transpose_261_perm_0, x = q_11_cast_fp16)[name = string("transpose_631")];
+            tensor<fp16, [1, 20, ?, ?]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_261, y = transpose_262)[name = string("qk_7_cast_fp16")];
+            int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)];
+            int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)];
+            bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")];
+            tensor<int32, [2]> var_478_begin_0 = const()[name = string("op_478_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_478_end_mask_0 = const()[name = string("op_478_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = concat_37, end_mask = var_478_end_mask_0, x = mask_to_fp16)[name = string("op_478_cast_fp16")];
+            int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)];
+            int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
+            bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")];
+            tensor<int32, [2]> var_479_begin_0 = const()[name = string("op_479_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_479_end_mask_0 = const()[name = string("op_479_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_479_cast_fp16 = slice_by_index(begin = var_479_begin_0, end = concat_38, end_mask = var_479_end_mask_0, x = var_478_cast_fp16)[name = string("op_479_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_479_cast_fp16)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_482_cast_fp16 = softmax(axis = var_391, x = qk_9_cast_fp16)[name = string("op_482_cast_fp16")];
+            bool var_484_transpose_x_0 = const()[name = string("op_484_transpose_x_0"), val = bool(false)];
+            bool var_484_transpose_y_0 = const()[name = string("op_484_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_15_cast_fp16 = transpose(perm = var_475, x = var_474_cast_fp16)[name = string("transpose_632")];
+            tensor<fp16, [1, 20, ?, 64]> var_484_cast_fp16 = matmul(transpose_x = var_484_transpose_x_0, transpose_y = var_484_transpose_y_0, x = var_482_cast_fp16, y = v_15_cast_fp16)[name = string("op_484_cast_fp16")];
+            tensor<int32, [4]> var_485 = const()[name = string("op_485"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_39x = const()[name = string("concat_39x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_486_cast_fp16 = transpose(perm = var_485, x = var_484_cast_fp16)[name = string("transpose_629")];
+            tensor<fp16, [1, ?, 1280]> x_25_cast_fp16 = reshape(shape = concat_39x, x = var_486_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_490_to_fp16 = const()[name = string("op_490_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193926784)))];
+            tensor<fp16, [1280]> var_491_to_fp16 = const()[name = string("op_491_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197203648)))];
+            tensor<fp16, [1, ?, 1280]> linear_11_cast_fp16 = linear(bias = var_491_to_fp16, weight = var_490_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")];
+            tensor<int32, [1]> var_498_axes_0 = const()[name = string("op_498_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197206272)))];
+            tensor<fp16, [1280]> blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197208896)))];
+            tensor<fp16, [1, ?, 1280]> var_498_cast_fp16 = layer_norm(axes = var_498_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_498_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_507_to_fp16 = const()[name = string("op_507_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197211520)))];
+            tensor<fp16, [1280]> var_508_to_fp16 = const()[name = string("op_508_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200488384)))];
+            tensor<fp16, [1, ?, 1280]> linear_12_cast_fp16 = linear(bias = var_508_to_fp16, weight = var_507_to_fp16, x = var_498_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<int32, [3]> concat_40 = const()[name = string("concat_40"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_42 = const()[name = string("concat_42"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_43 = const()[name = string("concat_43"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_44x = const()[name = string("concat_44x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_528_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_528_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_166_to_fp16 = const()[name = string("const_166_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_15_cast_fp16 = mul(x = var_528_cast_fp16, y = const_166_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_534 = const()[name = string("op_534"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_535_cast_fp16 = reshape(shape = var_534, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_535_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_167_to_fp16 = const()[name = string("const_167_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_19_cast_fp16 = mul(x = var_535_cast_fp16, y = const_167_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_541 = const()[name = string("op_541"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_542_cast_fp16 = reshape(shape = var_541, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_542_cast_fp16")];
+            tensor<int32, [4]> var_543 = const()[name = string("op_543"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
+            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_263_perm_0 = const()[name = string("transpose_263_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_264_perm_0 = const()[name = string("transpose_264_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_264 = transpose(perm = transpose_264_perm_0, x = k_19_cast_fp16)[name = string("transpose_626")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_263 = transpose(perm = transpose_263_perm_0, x = q_15_cast_fp16)[name = string("transpose_627")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_263, y = transpose_264)[name = string("qk_11_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_547_cast_fp16 = softmax(axis = var_391, x = qk_11_cast_fp16)[name = string("op_547_cast_fp16")];
+            bool var_549_transpose_x_0 = const()[name = string("op_549_transpose_x_0"), val = bool(false)];
+            bool var_549_transpose_y_0 = const()[name = string("op_549_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_543, x = var_542_cast_fp16)[name = string("transpose_628")];
+            tensor<fp16, [1, 20, ?, 64]> var_549_cast_fp16 = matmul(transpose_x = var_549_transpose_x_0, transpose_y = var_549_transpose_y_0, x = var_547_cast_fp16, y = v_19_cast_fp16)[name = string("op_549_cast_fp16")];
+            tensor<int32, [4]> var_550 = const()[name = string("op_550"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_45x = const()[name = string("concat_45x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_551_cast_fp16 = transpose(perm = var_550, x = var_549_cast_fp16)[name = string("transpose_625")];
+            tensor<fp16, [1, ?, 1280]> x_31_cast_fp16 = reshape(shape = concat_45x, x = var_551_cast_fp16)[name = string("x_31_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_555_to_fp16 = const()[name = string("op_555_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200491008)))];
+            tensor<fp16, [1280]> var_556_to_fp16 = const()[name = string("op_556_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203767872)))];
+            tensor<fp16, [1, ?, 1280]> linear_13_cast_fp16 = linear(bias = var_556_to_fp16, weight = var_555_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")];
+            tensor<int32, [1]> var_563_axes_0 = const()[name = string("op_563_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203770496)))];
+            tensor<fp16, [1280]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203773120)))];
+            tensor<fp16, [1, ?, 1280]> var_563_cast_fp16 = layer_norm(axes = var_563_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_563_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_572_to_fp16 = const()[name = string("op_572_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203775744)))];
+            tensor<fp16, [5120]> var_573_to_fp16 = const()[name = string("op_573_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216883008)))];
+            tensor<fp16, [1, ?, 5120]> linear_14_cast_fp16 = linear(bias = var_573_to_fp16, weight = var_572_to_fp16, x = var_563_cast_fp16)[name = string("linear_14_cast_fp16")];
+            string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_578_to_fp16 = const()[name = string("op_578_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216893312)))];
+            tensor<fp16, [1280]> var_579_to_fp16 = const()[name = string("op_579_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230000576)))];
+            tensor<fp16, [1, ?, 1280]> linear_15_cast_fp16 = linear(bias = var_579_to_fp16, weight = var_578_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")];
+            tensor<int32, [4]> k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_66)[name = string("k_cache_9_cast_fp16")];
+            tensor<int32, [4]> v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_67)[name = string("v_cache_9_cast_fp16")];
+            tensor<int32, [4]> k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")];
+            tensor<int32, [4]> v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")];
+            int32 var_602 = const()[name = string("op_602"), val = int32(-1)];
+            tensor<int32, [1]> var_620_axes_0 = const()[name = string("op_620_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230003200)))];
+            tensor<fp16, [1280]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230005824)))];
+            fp16 var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_620_cast_fp16 = layer_norm(axes = var_620_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_620_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_631_to_fp16 = const()[name = string("op_631_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230008448)))];
+            tensor<fp16, [1280]> var_632_to_fp16 = const()[name = string("op_632_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233285312)))];
+            tensor<fp16, [1, ?, 1280]> linear_16_cast_fp16 = linear(bias = var_632_to_fp16, weight = var_631_to_fp16, x = var_620_cast_fp16)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_635_to_fp16 = const()[name = string("op_635_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233287936)))];
+            tensor<fp16, [1, ?, 1280]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_635_to_fp16, x = var_620_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_639_to_fp16 = const()[name = string("op_639_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236564800)))];
+            tensor<fp16, [1280]> var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239841664)))];
+            tensor<fp16, [1, ?, 1280]> linear_18_cast_fp16 = linear(bias = var_640_to_fp16, weight = var_639_to_fp16, x = var_620_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<int32, [3]> var_642_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_642_shape_cast_fp16")];
+            int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
+            int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
+            bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
+            string var_642_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_642_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_642_shape_cast_fp16_to_uint16 = cast(dtype = var_642_shape_cast_fp16_to_uint16_dtype_0, x = var_642_shape_cast_fp16)[name = string("cast_386")];
+            uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_642_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
+            string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_385")];
+            int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")];
+            tensor<int32, [1]> expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")];
+            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([2])];
+            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
+            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")];
+            tensor<int32, [1]> concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)];
+            bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_66)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_68_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_68 = read_state(input = k_cache1)[name = string("coreml_update_state_68")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_67)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_69_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_69 = read_state(input = v_cache1)[name = string("coreml_update_state_69")];
+            int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)];
+            int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(1280)];
+            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
+            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")];
+            tensor<int32, [3]> var_658_begin_0 = const()[name = string("op_658_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_658_end_mask_0 = const()[name = string("op_658_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_658_cast_fp16 = slice_by_index(begin = var_658_begin_0, end = concat_54, end_mask = var_658_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_658_cast_fp16")];
+            tensor<int32, [3]> var_661_begin_0 = const()[name = string("op_661_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_661_end_mask_0 = const()[name = string("op_661_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_661_cast_fp16 = slice_by_index(begin = var_661_begin_0, end = concat_54, end_mask = var_661_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_661_cast_fp16")];
+            tensor<int32, [4]> concat_56x = const()[name = string("concat_56x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_671_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_671_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_19_cast_fp16 = mul(x = var_671_cast_fp16, y = const_168_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> concat_57x = const()[name = string("concat_57x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_678_cast_fp16 = reshape(shape = concat_57x, x = var_658_cast_fp16)[name = string("op_678_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_25_cast_fp16 = mul(x = var_678_cast_fp16, y = const_169_to_fp16)[name = string("k_25_cast_fp16")];
+            tensor<int32, [4]> concat_58x = const()[name = string("concat_58x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_685_cast_fp16 = reshape(shape = concat_58x, x = var_661_cast_fp16)[name = string("op_685_cast_fp16")];
+            tensor<int32, [4]> var_686 = const()[name = string("op_686"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
+            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_265_perm_0 = const()[name = string("transpose_265_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_266_perm_0 = const()[name = string("transpose_266_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_266 = transpose(perm = transpose_266_perm_0, x = k_25_cast_fp16)[name = string("transpose_622")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_265 = transpose(perm = transpose_265_perm_0, x = q_19_cast_fp16)[name = string("transpose_623")];
+            tensor<fp16, [1, 20, ?, ?]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_265, y = transpose_266)[name = string("qk_13_cast_fp16")];
+            int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)];
+            int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)];
+            bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")];
+            tensor<int32, [2]> var_689_begin_0 = const()[name = string("op_689_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_689_end_mask_0 = const()[name = string("op_689_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_689_cast_fp16 = slice_by_index(begin = var_689_begin_0, end = concat_59, end_mask = var_689_end_mask_0, x = mask_to_fp16)[name = string("op_689_cast_fp16")];
+            int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)];
+            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
+            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")];
+            tensor<int32, [2]> var_690_begin_0 = const()[name = string("op_690_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_690_end_mask_0 = const()[name = string("op_690_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_690_cast_fp16 = slice_by_index(begin = var_690_begin_0, end = concat_60, end_mask = var_690_end_mask_0, x = var_689_cast_fp16)[name = string("op_690_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_690_cast_fp16)[name = string("qk_15_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_693_cast_fp16 = softmax(axis = var_602, x = qk_15_cast_fp16)[name = string("op_693_cast_fp16")];
+            bool var_695_transpose_x_0 = const()[name = string("op_695_transpose_x_0"), val = bool(false)];
+            bool var_695_transpose_y_0 = const()[name = string("op_695_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_25_cast_fp16 = transpose(perm = var_686, x = var_685_cast_fp16)[name = string("transpose_624")];
+            tensor<fp16, [1, 20, ?, 64]> var_695_cast_fp16 = matmul(transpose_x = var_695_transpose_x_0, transpose_y = var_695_transpose_y_0, x = var_693_cast_fp16, y = v_25_cast_fp16)[name = string("op_695_cast_fp16")];
+            tensor<int32, [4]> var_696 = const()[name = string("op_696"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_61x = const()[name = string("concat_61x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_697_cast_fp16 = transpose(perm = var_696, x = var_695_cast_fp16)[name = string("transpose_621")];
+            tensor<fp16, [1, ?, 1280]> x_43_cast_fp16 = reshape(shape = concat_61x, x = var_697_cast_fp16)[name = string("x_43_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_701_to_fp16 = const()[name = string("op_701_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239844288)))];
+            tensor<fp16, [1280]> var_702_to_fp16 = const()[name = string("op_702_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243121152)))];
+            tensor<fp16, [1, ?, 1280]> linear_19_cast_fp16 = linear(bias = var_702_to_fp16, weight = var_701_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")];
+            tensor<int32, [1]> var_709_axes_0 = const()[name = string("op_709_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243123776)))];
+            tensor<fp16, [1280]> blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243126400)))];
+            tensor<fp16, [1, ?, 1280]> var_709_cast_fp16 = layer_norm(axes = var_709_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_709_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_718_to_fp16 = const()[name = string("op_718_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243129024)))];
+            tensor<fp16, [1280]> var_719_to_fp16 = const()[name = string("op_719_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246405888)))];
+            tensor<fp16, [1, ?, 1280]> linear_20_cast_fp16 = linear(bias = var_719_to_fp16, weight = var_718_to_fp16, x = var_709_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [3]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_63 = const()[name = string("concat_63"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_64 = const()[name = string("concat_64"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_66x = const()[name = string("concat_66x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_739_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_739_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_23_cast_fp16 = mul(x = var_739_cast_fp16, y = const_170_to_fp16)[name = string("q_23_cast_fp16")];
+            tensor<int32, [4]> var_745 = const()[name = string("op_745"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_746_cast_fp16 = reshape(shape = var_745, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_746_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_29_cast_fp16 = mul(x = var_746_cast_fp16, y = const_171_to_fp16)[name = string("k_29_cast_fp16")];
+            tensor<int32, [4]> var_752 = const()[name = string("op_752"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_753_cast_fp16 = reshape(shape = var_752, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_753_cast_fp16")];
+            tensor<int32, [4]> var_754 = const()[name = string("op_754"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
+            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_267_perm_0 = const()[name = string("transpose_267_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_268_perm_0 = const()[name = string("transpose_268_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_268 = transpose(perm = transpose_268_perm_0, x = k_29_cast_fp16)[name = string("transpose_618")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_267 = transpose(perm = transpose_267_perm_0, x = q_23_cast_fp16)[name = string("transpose_619")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_267, y = transpose_268)[name = string("qk_17_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_758_cast_fp16 = softmax(axis = var_602, x = qk_17_cast_fp16)[name = string("op_758_cast_fp16")];
+            bool var_760_transpose_x_0 = const()[name = string("op_760_transpose_x_0"), val = bool(false)];
+            bool var_760_transpose_y_0 = const()[name = string("op_760_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_29_cast_fp16 = transpose(perm = var_754, x = var_753_cast_fp16)[name = string("transpose_620")];
+            tensor<fp16, [1, 20, ?, 64]> var_760_cast_fp16 = matmul(transpose_x = var_760_transpose_x_0, transpose_y = var_760_transpose_y_0, x = var_758_cast_fp16, y = v_29_cast_fp16)[name = string("op_760_cast_fp16")];
+            tensor<int32, [4]> var_761 = const()[name = string("op_761"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_67x = const()[name = string("concat_67x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_762_cast_fp16 = transpose(perm = var_761, x = var_760_cast_fp16)[name = string("transpose_617")];
+            tensor<fp16, [1, ?, 1280]> x_49_cast_fp16 = reshape(shape = concat_67x, x = var_762_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246408512)))];
+            tensor<fp16, [1280]> var_767_to_fp16 = const()[name = string("op_767_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249685376)))];
+            tensor<fp16, [1, ?, 1280]> linear_21_cast_fp16 = linear(bias = var_767_to_fp16, weight = var_766_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")];
+            tensor<int32, [1]> var_774_axes_0 = const()[name = string("op_774_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249688000)))];
+            tensor<fp16, [1280]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249690624)))];
+            tensor<fp16, [1, ?, 1280]> var_774_cast_fp16 = layer_norm(axes = var_774_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_774_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_783_to_fp16 = const()[name = string("op_783_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249693248)))];
+            tensor<fp16, [5120]> var_784_to_fp16 = const()[name = string("op_784_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262800512)))];
+            tensor<fp16, [1, ?, 5120]> linear_22_cast_fp16 = linear(bias = var_784_to_fp16, weight = var_783_to_fp16, x = var_774_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_789_to_fp16 = const()[name = string("op_789_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262810816)))];
+            tensor<fp16, [1280]> var_790_to_fp16 = const()[name = string("op_790_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275918080)))];
+            tensor<fp16, [1, ?, 1280]> linear_23_cast_fp16 = linear(bias = var_790_to_fp16, weight = var_789_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")];
+            tensor<int32, [4]> k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_68)[name = string("k_cache_13_cast_fp16")];
+            tensor<int32, [4]> v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_69)[name = string("v_cache_13_cast_fp16")];
+            tensor<int32, [4]> k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")];
+            tensor<int32, [4]> v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")];
+            int32 var_813 = const()[name = string("op_813"), val = int32(-1)];
+            tensor<int32, [1]> var_831_axes_0 = const()[name = string("op_831_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275920704)))];
+            tensor<fp16, [1280]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275923328)))];
+            fp16 var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_831_cast_fp16 = layer_norm(axes = var_831_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_831_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_842_to_fp16 = const()[name = string("op_842_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275925952)))];
+            tensor<fp16, [1280]> var_843_to_fp16 = const()[name = string("op_843_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279202816)))];
+            tensor<fp16, [1, ?, 1280]> linear_24_cast_fp16 = linear(bias = var_843_to_fp16, weight = var_842_to_fp16, x = var_831_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_846_to_fp16 = const()[name = string("op_846_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279205440)))];
+            tensor<fp16, [1, ?, 1280]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_846_to_fp16, x = var_831_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282482304)))];
+            tensor<fp16, [1280]> var_851_to_fp16 = const()[name = string("op_851_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285759168)))];
+            tensor<fp16, [1, ?, 1280]> linear_26_cast_fp16 = linear(bias = var_851_to_fp16, weight = var_850_to_fp16, x = var_831_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [3]> var_853_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_853_shape_cast_fp16")];
+            int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
+            int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
+            bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
+            string var_853_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_853_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_853_shape_cast_fp16_to_uint16 = cast(dtype = var_853_shape_cast_fp16_to_uint16_dtype_0, x = var_853_shape_cast_fp16)[name = string("cast_384")];
+            uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_853_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
+            string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_383")];
+            int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")];
+            tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")];
+            tensor<int32, [1]> concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor<int32, [1]>([3])];
+            int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)];
+            bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")];
+            tensor<int32, [1]> concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)];
+            bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_68)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_70_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_70 = read_state(input = k_cache1)[name = string("coreml_update_state_70")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_69)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_71_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_71 = read_state(input = v_cache1)[name = string("coreml_update_state_71")];
+            int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)];
+            int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(1280)];
+            int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)];
+            bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")];
+            tensor<int32, [3]> var_869_begin_0 = const()[name = string("op_869_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_869_end_mask_0 = const()[name = string("op_869_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = concat_76, end_mask = var_869_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_869_cast_fp16")];
+            tensor<int32, [3]> var_872_begin_0 = const()[name = string("op_872_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_872_end_mask_0 = const()[name = string("op_872_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_872_cast_fp16 = slice_by_index(begin = var_872_begin_0, end = concat_76, end_mask = var_872_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_872_cast_fp16")];
+            tensor<int32, [4]> concat_78x = const()[name = string("concat_78x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_882_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_882_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_27_cast_fp16 = mul(x = var_882_cast_fp16, y = const_172_to_fp16)[name = string("q_27_cast_fp16")];
+            tensor<int32, [4]> concat_79x = const()[name = string("concat_79x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_889_cast_fp16 = reshape(shape = concat_79x, x = var_869_cast_fp16)[name = string("op_889_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_35_cast_fp16 = mul(x = var_889_cast_fp16, y = const_173_to_fp16)[name = string("k_35_cast_fp16")];
+            tensor<int32, [4]> concat_80x = const()[name = string("concat_80x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_896_cast_fp16 = reshape(shape = concat_80x, x = var_872_cast_fp16)[name = string("op_896_cast_fp16")];
+            tensor<int32, [4]> var_897 = const()[name = string("op_897"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
+            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_269_perm_0 = const()[name = string("transpose_269_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_270_perm_0 = const()[name = string("transpose_270_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_270 = transpose(perm = transpose_270_perm_0, x = k_35_cast_fp16)[name = string("transpose_614")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_269 = transpose(perm = transpose_269_perm_0, x = q_27_cast_fp16)[name = string("transpose_615")];
+            tensor<fp16, [1, 20, ?, ?]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_269, y = transpose_270)[name = string("qk_19_cast_fp16")];
+            int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)];
+            int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
+            bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")];
+            tensor<int32, [2]> var_900_begin_0 = const()[name = string("op_900_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_900_end_mask_0 = const()[name = string("op_900_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_900_cast_fp16 = slice_by_index(begin = var_900_begin_0, end = concat_81, end_mask = var_900_end_mask_0, x = mask_to_fp16)[name = string("op_900_cast_fp16")];
+            int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)];
+            int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)];
+            bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")];
+            tensor<int32, [2]> var_901_begin_0 = const()[name = string("op_901_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_901_end_mask_0 = const()[name = string("op_901_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_901_cast_fp16 = slice_by_index(begin = var_901_begin_0, end = concat_82, end_mask = var_901_end_mask_0, x = var_900_cast_fp16)[name = string("op_901_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_901_cast_fp16)[name = string("qk_21_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_904_cast_fp16 = softmax(axis = var_813, x = qk_21_cast_fp16)[name = string("op_904_cast_fp16")];
+            bool var_906_transpose_x_0 = const()[name = string("op_906_transpose_x_0"), val = bool(false)];
+            bool var_906_transpose_y_0 = const()[name = string("op_906_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_35_cast_fp16 = transpose(perm = var_897, x = var_896_cast_fp16)[name = string("transpose_616")];
+            tensor<fp16, [1, 20, ?, 64]> var_906_cast_fp16 = matmul(transpose_x = var_906_transpose_x_0, transpose_y = var_906_transpose_y_0, x = var_904_cast_fp16, y = v_35_cast_fp16)[name = string("op_906_cast_fp16")];
+            tensor<int32, [4]> var_907 = const()[name = string("op_907"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_83x = const()[name = string("concat_83x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_908_cast_fp16 = transpose(perm = var_907, x = var_906_cast_fp16)[name = string("transpose_613")];
+            tensor<fp16, [1, ?, 1280]> x_61_cast_fp16 = reshape(shape = concat_83x, x = var_908_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285761792)))];
+            tensor<fp16, [1280]> var_913_to_fp16 = const()[name = string("op_913_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289038656)))];
+            tensor<fp16, [1, ?, 1280]> linear_27_cast_fp16 = linear(bias = var_913_to_fp16, weight = var_912_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")];
+            tensor<int32, [1]> var_920_axes_0 = const()[name = string("op_920_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289041280)))];
+            tensor<fp16, [1280]> blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289043904)))];
+            tensor<fp16, [1, ?, 1280]> var_920_cast_fp16 = layer_norm(axes = var_920_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_920_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_929_to_fp16 = const()[name = string("op_929_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289046528)))];
+            tensor<fp16, [1280]> var_930_to_fp16 = const()[name = string("op_930_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292323392)))];
+            tensor<fp16, [1, ?, 1280]> linear_28_cast_fp16 = linear(bias = var_930_to_fp16, weight = var_929_to_fp16, x = var_920_cast_fp16)[name = string("linear_28_cast_fp16")];
+            tensor<int32, [3]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_85 = const()[name = string("concat_85"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_87 = const()[name = string("concat_87"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_88x = const()[name = string("concat_88x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_950_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_950_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_31_cast_fp16 = mul(x = var_950_cast_fp16, y = const_174_to_fp16)[name = string("q_31_cast_fp16")];
+            tensor<int32, [4]> var_956 = const()[name = string("op_956"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_957_cast_fp16 = reshape(shape = var_956, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_957_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_39_cast_fp16 = mul(x = var_957_cast_fp16, y = const_175_to_fp16)[name = string("k_39_cast_fp16")];
+            tensor<int32, [4]> var_963 = const()[name = string("op_963"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_964_cast_fp16 = reshape(shape = var_963, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_964_cast_fp16")];
+            tensor<int32, [4]> var_965 = const()[name = string("op_965"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)];
+            bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_271_perm_0 = const()[name = string("transpose_271_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_272_perm_0 = const()[name = string("transpose_272_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_272 = transpose(perm = transpose_272_perm_0, x = k_39_cast_fp16)[name = string("transpose_610")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_271 = transpose(perm = transpose_271_perm_0, x = q_31_cast_fp16)[name = string("transpose_611")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_271, y = transpose_272)[name = string("qk_23_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_969_cast_fp16 = softmax(axis = var_813, x = qk_23_cast_fp16)[name = string("op_969_cast_fp16")];
+            bool var_971_transpose_x_0 = const()[name = string("op_971_transpose_x_0"), val = bool(false)];
+            bool var_971_transpose_y_0 = const()[name = string("op_971_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_39_cast_fp16 = transpose(perm = var_965, x = var_964_cast_fp16)[name = string("transpose_612")];
+            tensor<fp16, [1, 20, ?, 64]> var_971_cast_fp16 = matmul(transpose_x = var_971_transpose_x_0, transpose_y = var_971_transpose_y_0, x = var_969_cast_fp16, y = v_39_cast_fp16)[name = string("op_971_cast_fp16")];
+            tensor<int32, [4]> var_972 = const()[name = string("op_972"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_89x = const()[name = string("concat_89x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_973_cast_fp16 = transpose(perm = var_972, x = var_971_cast_fp16)[name = string("transpose_609")];
+            tensor<fp16, [1, ?, 1280]> x_67_cast_fp16 = reshape(shape = concat_89x, x = var_973_cast_fp16)[name = string("x_67_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_977_to_fp16 = const()[name = string("op_977_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292326016)))];
+            tensor<fp16, [1280]> var_978_to_fp16 = const()[name = string("op_978_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295602880)))];
+            tensor<fp16, [1, ?, 1280]> linear_29_cast_fp16 = linear(bias = var_978_to_fp16, weight = var_977_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")];
+            tensor<int32, [1]> var_985_axes_0 = const()[name = string("op_985_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295605504)))];
+            tensor<fp16, [1280]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295608128)))];
+            tensor<fp16, [1, ?, 1280]> var_985_cast_fp16 = layer_norm(axes = var_985_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_985_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_994_to_fp16 = const()[name = string("op_994_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295610752)))];
+            tensor<fp16, [5120]> var_995_to_fp16 = const()[name = string("op_995_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308718016)))];
+            tensor<fp16, [1, ?, 5120]> linear_30_cast_fp16 = linear(bias = var_995_to_fp16, weight = var_994_to_fp16, x = var_985_cast_fp16)[name = string("linear_30_cast_fp16")];
+            string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1000_to_fp16 = const()[name = string("op_1000_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308728320)))];
+            tensor<fp16, [1280]> var_1001_to_fp16 = const()[name = string("op_1001_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321835584)))];
+            tensor<fp16, [1, ?, 1280]> linear_31_cast_fp16 = linear(bias = var_1001_to_fp16, weight = var_1000_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")];
+            tensor<int32, [4]> k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_70)[name = string("k_cache_17_cast_fp16")];
+            tensor<int32, [4]> v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_71)[name = string("v_cache_17_cast_fp16")];
+            tensor<int32, [4]> k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")];
+            tensor<int32, [4]> v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")];
+            int32 var_1024 = const()[name = string("op_1024"), val = int32(-1)];
+            tensor<int32, [1]> var_1042_axes_0 = const()[name = string("op_1042_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321838208)))];
+            tensor<fp16, [1280]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321840832)))];
+            fp16 var_1030_to_fp16 = const()[name = string("op_1030_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_1042_cast_fp16 = layer_norm(axes = var_1042_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_1042_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1053_to_fp16 = const()[name = string("op_1053_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321843456)))];
+            tensor<fp16, [1280]> var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325120320)))];
+            tensor<fp16, [1, ?, 1280]> linear_32_cast_fp16 = linear(bias = var_1054_to_fp16, weight = var_1053_to_fp16, x = var_1042_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1057_to_fp16 = const()[name = string("op_1057_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325122944)))];
+            tensor<fp16, [1, ?, 1280]> linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1057_to_fp16, x = var_1042_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328399808)))];
+            tensor<fp16, [1280]> var_1062_to_fp16 = const()[name = string("op_1062_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331676672)))];
+            tensor<fp16, [1, ?, 1280]> linear_34_cast_fp16 = linear(bias = var_1062_to_fp16, weight = var_1061_to_fp16, x = var_1042_cast_fp16)[name = string("linear_34_cast_fp16")];
+            tensor<int32, [3]> var_1064_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_1064_shape_cast_fp16")];
+            int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)];
+            int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)];
+            bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)];
+            string var_1064_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1064_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1064_shape_cast_fp16_to_uint16 = cast(dtype = var_1064_shape_cast_fp16_to_uint16_dtype_0, x = var_1064_shape_cast_fp16)[name = string("cast_382")];
+            uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_1064_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")];
+            string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_381")];
+            int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")];
+            tensor<int32, [1]> expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")];
+            tensor<int32, [1]> concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor<int32, [1]>([4])];
+            int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)];
+            bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")];
+            tensor<int32, [1]> concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
+            bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_70)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_72_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_72 = read_state(input = k_cache1)[name = string("coreml_update_state_72")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_71)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_73_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_73 = read_state(input = v_cache1)[name = string("coreml_update_state_73")];
+            int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)];
+            int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(1280)];
+            int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)];
+            bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")];
+            tensor<int32, [3]> var_1080_begin_0 = const()[name = string("op_1080_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1080_end_mask_0 = const()[name = string("op_1080_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1080_cast_fp16 = slice_by_index(begin = var_1080_begin_0, end = concat_98, end_mask = var_1080_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1080_cast_fp16")];
+            tensor<int32, [3]> var_1083_begin_0 = const()[name = string("op_1083_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1083_end_mask_0 = const()[name = string("op_1083_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1083_cast_fp16 = slice_by_index(begin = var_1083_begin_0, end = concat_98, end_mask = var_1083_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1083_cast_fp16")];
+            tensor<int32, [4]> concat_100x = const()[name = string("concat_100x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1093_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1093_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_35_cast_fp16 = mul(x = var_1093_cast_fp16, y = const_176_to_fp16)[name = string("q_35_cast_fp16")];
+            tensor<int32, [4]> concat_101x = const()[name = string("concat_101x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1100_cast_fp16 = reshape(shape = concat_101x, x = var_1080_cast_fp16)[name = string("op_1100_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_45_cast_fp16 = mul(x = var_1100_cast_fp16, y = const_177_to_fp16)[name = string("k_45_cast_fp16")];
+            tensor<int32, [4]> concat_102x = const()[name = string("concat_102x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1107_cast_fp16 = reshape(shape = concat_102x, x = var_1083_cast_fp16)[name = string("op_1107_cast_fp16")];
+            tensor<int32, [4]> var_1108 = const()[name = string("op_1108"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)];
+            bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_273_perm_0 = const()[name = string("transpose_273_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_274_perm_0 = const()[name = string("transpose_274_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_274 = transpose(perm = transpose_274_perm_0, x = k_45_cast_fp16)[name = string("transpose_606")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_273 = transpose(perm = transpose_273_perm_0, x = q_35_cast_fp16)[name = string("transpose_607")];
+            tensor<fp16, [1, 20, ?, ?]> qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_273, y = transpose_274)[name = string("qk_25_cast_fp16")];
+            int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)];
+            int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)];
+            bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")];
+            tensor<int32, [2]> var_1111_begin_0 = const()[name = string("op_1111_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1111_end_mask_0 = const()[name = string("op_1111_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1111_cast_fp16 = slice_by_index(begin = var_1111_begin_0, end = concat_103, end_mask = var_1111_end_mask_0, x = mask_to_fp16)[name = string("op_1111_cast_fp16")];
+            int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)];
+            int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)];
+            bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")];
+            tensor<int32, [2]> var_1112_begin_0 = const()[name = string("op_1112_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1112_end_mask_0 = const()[name = string("op_1112_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = concat_104, end_mask = var_1112_end_mask_0, x = var_1111_cast_fp16)[name = string("op_1112_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1112_cast_fp16)[name = string("qk_27_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_1115_cast_fp16 = softmax(axis = var_1024, x = qk_27_cast_fp16)[name = string("op_1115_cast_fp16")];
+            bool var_1117_transpose_x_0 = const()[name = string("op_1117_transpose_x_0"), val = bool(false)];
+            bool var_1117_transpose_y_0 = const()[name = string("op_1117_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_45_cast_fp16 = transpose(perm = var_1108, x = var_1107_cast_fp16)[name = string("transpose_608")];
+            tensor<fp16, [1, 20, ?, 64]> var_1117_cast_fp16 = matmul(transpose_x = var_1117_transpose_x_0, transpose_y = var_1117_transpose_y_0, x = var_1115_cast_fp16, y = v_45_cast_fp16)[name = string("op_1117_cast_fp16")];
+            tensor<int32, [4]> var_1118 = const()[name = string("op_1118"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_105x = const()[name = string("concat_105x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1119_cast_fp16 = transpose(perm = var_1118, x = var_1117_cast_fp16)[name = string("transpose_605")];
+            tensor<fp16, [1, ?, 1280]> x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1119_cast_fp16)[name = string("x_79_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1123_to_fp16 = const()[name = string("op_1123_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331679296)))];
+            tensor<fp16, [1280]> var_1124_to_fp16 = const()[name = string("op_1124_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334956160)))];
+            tensor<fp16, [1, ?, 1280]> linear_35_cast_fp16 = linear(bias = var_1124_to_fp16, weight = var_1123_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")];
+            tensor<int32, [1]> var_1131_axes_0 = const()[name = string("op_1131_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334958784)))];
+            tensor<fp16, [1280]> blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334961408)))];
+            tensor<fp16, [1, ?, 1280]> var_1131_cast_fp16 = layer_norm(axes = var_1131_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1131_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1140_to_fp16 = const()[name = string("op_1140_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334964032)))];
+            tensor<fp16, [1280]> var_1141_to_fp16 = const()[name = string("op_1141_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338240896)))];
+            tensor<fp16, [1, ?, 1280]> linear_36_cast_fp16 = linear(bias = var_1141_to_fp16, weight = var_1140_to_fp16, x = var_1131_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<int32, [3]> concat_106 = const()[name = string("concat_106"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_108 = const()[name = string("concat_108"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_109 = const()[name = string("concat_109"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_110x = const()[name = string("concat_110x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1161_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1161_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_39_cast_fp16 = mul(x = var_1161_cast_fp16, y = const_178_to_fp16)[name = string("q_39_cast_fp16")];
+            tensor<int32, [4]> var_1167 = const()[name = string("op_1167"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1168_cast_fp16 = reshape(shape = var_1167, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1168_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_49_cast_fp16 = mul(x = var_1168_cast_fp16, y = const_179_to_fp16)[name = string("k_49_cast_fp16")];
+            tensor<int32, [4]> var_1174 = const()[name = string("op_1174"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1175_cast_fp16 = reshape(shape = var_1174, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1175_cast_fp16")];
+            tensor<int32, [4]> var_1176 = const()[name = string("op_1176"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)];
+            bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_275_perm_0 = const()[name = string("transpose_275_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_276_perm_0 = const()[name = string("transpose_276_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_276 = transpose(perm = transpose_276_perm_0, x = k_49_cast_fp16)[name = string("transpose_602")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_275 = transpose(perm = transpose_275_perm_0, x = q_39_cast_fp16)[name = string("transpose_603")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_275, y = transpose_276)[name = string("qk_29_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_1180_cast_fp16 = softmax(axis = var_1024, x = qk_29_cast_fp16)[name = string("op_1180_cast_fp16")];
+            bool var_1182_transpose_x_0 = const()[name = string("op_1182_transpose_x_0"), val = bool(false)];
+            bool var_1182_transpose_y_0 = const()[name = string("op_1182_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_49_cast_fp16 = transpose(perm = var_1176, x = var_1175_cast_fp16)[name = string("transpose_604")];
+            tensor<fp16, [1, 20, ?, 64]> var_1182_cast_fp16 = matmul(transpose_x = var_1182_transpose_x_0, transpose_y = var_1182_transpose_y_0, x = var_1180_cast_fp16, y = v_49_cast_fp16)[name = string("op_1182_cast_fp16")];
+            tensor<int32, [4]> var_1183 = const()[name = string("op_1183"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_111x = const()[name = string("concat_111x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1184_cast_fp16 = transpose(perm = var_1183, x = var_1182_cast_fp16)[name = string("transpose_601")];
+            tensor<fp16, [1, ?, 1280]> x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1184_cast_fp16)[name = string("x_85_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1188_to_fp16 = const()[name = string("op_1188_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338243520)))];
+            tensor<fp16, [1280]> var_1189_to_fp16 = const()[name = string("op_1189_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341520384)))];
+            tensor<fp16, [1, ?, 1280]> linear_37_cast_fp16 = linear(bias = var_1189_to_fp16, weight = var_1188_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")];
+            tensor<int32, [1]> var_1196_axes_0 = const()[name = string("op_1196_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341523008)))];
+            tensor<fp16, [1280]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341525632)))];
+            tensor<fp16, [1, ?, 1280]> var_1196_cast_fp16 = layer_norm(axes = var_1196_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1196_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1205_to_fp16 = const()[name = string("op_1205_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341528256)))];
+            tensor<fp16, [5120]> var_1206_to_fp16 = const()[name = string("op_1206_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354635520)))];
+            tensor<fp16, [1, ?, 5120]> linear_38_cast_fp16 = linear(bias = var_1206_to_fp16, weight = var_1205_to_fp16, x = var_1196_cast_fp16)[name = string("linear_38_cast_fp16")];
+            string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354645824)))];
+            tensor<fp16, [1280]> var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367753088)))];
+            tensor<fp16, [1, ?, 1280]> linear_39_cast_fp16 = linear(bias = var_1212_to_fp16, weight = var_1211_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")];
+            tensor<int32, [4]> k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_72)[name = string("k_cache_21_cast_fp16")];
+            tensor<int32, [4]> v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_73)[name = string("v_cache_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")];
+            tensor<int32, [4]> v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")];
+            int32 var_1235 = const()[name = string("op_1235"), val = int32(-1)];
+            tensor<int32, [1]> var_1253_axes_0 = const()[name = string("op_1253_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367755712)))];
+            tensor<fp16, [1280]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367758336)))];
+            fp16 var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_1253_cast_fp16 = layer_norm(axes = var_1253_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1253_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1264_to_fp16 = const()[name = string("op_1264_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367760960)))];
+            tensor<fp16, [1280]> var_1265_to_fp16 = const()[name = string("op_1265_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371037824)))];
+            tensor<fp16, [1, ?, 1280]> linear_40_cast_fp16 = linear(bias = var_1265_to_fp16, weight = var_1264_to_fp16, x = var_1253_cast_fp16)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1268_to_fp16 = const()[name = string("op_1268_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371040448)))];
+            tensor<fp16, [1, ?, 1280]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1268_to_fp16, x = var_1253_cast_fp16)[name = string("linear_41_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1272_to_fp16 = const()[name = string("op_1272_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374317312)))];
+            tensor<fp16, [1280]> var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377594176)))];
+            tensor<fp16, [1, ?, 1280]> linear_42_cast_fp16 = linear(bias = var_1273_to_fp16, weight = var_1272_to_fp16, x = var_1253_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<int32, [3]> var_1275_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1275_shape_cast_fp16")];
+            int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)];
+            int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)];
+            bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)];
+            string var_1275_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1275_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1275_shape_cast_fp16_to_uint16 = cast(dtype = var_1275_shape_cast_fp16_to_uint16_dtype_0, x = var_1275_shape_cast_fp16)[name = string("cast_380")];
+            uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1275_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")];
+            string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_379")];
+            int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")];
+            tensor<int32, [1]> expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")];
+            tensor<int32, [1]> concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor<int32, [1]>([5])];
+            int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
+            bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")];
+            tensor<int32, [1]> concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)];
+            bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_72)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_74_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_74 = read_state(input = k_cache1)[name = string("coreml_update_state_74")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_73)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_75_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_75 = read_state(input = v_cache1)[name = string("coreml_update_state_75")];
+            int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)];
+            int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(1280)];
+            int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)];
+            bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")];
+            tensor<int32, [3]> var_1291_begin_0 = const()[name = string("op_1291_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1291_end_mask_0 = const()[name = string("op_1291_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1291_cast_fp16 = slice_by_index(begin = var_1291_begin_0, end = concat_120, end_mask = var_1291_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1291_cast_fp16")];
+            tensor<int32, [3]> var_1294_begin_0 = const()[name = string("op_1294_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1294_end_mask_0 = const()[name = string("op_1294_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1294_cast_fp16 = slice_by_index(begin = var_1294_begin_0, end = concat_120, end_mask = var_1294_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1294_cast_fp16")];
+            tensor<int32, [4]> concat_122x = const()[name = string("concat_122x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1304_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1304_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_43_cast_fp16 = mul(x = var_1304_cast_fp16, y = const_180_to_fp16)[name = string("q_43_cast_fp16")];
+            tensor<int32, [4]> concat_123x = const()[name = string("concat_123x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1311_cast_fp16 = reshape(shape = concat_123x, x = var_1291_cast_fp16)[name = string("op_1311_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_55_cast_fp16 = mul(x = var_1311_cast_fp16, y = const_181_to_fp16)[name = string("k_55_cast_fp16")];
+            tensor<int32, [4]> concat_124x = const()[name = string("concat_124x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1318_cast_fp16 = reshape(shape = concat_124x, x = var_1294_cast_fp16)[name = string("op_1318_cast_fp16")];
+            tensor<int32, [4]> var_1319 = const()[name = string("op_1319"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)];
+            bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_277_perm_0 = const()[name = string("transpose_277_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_278_perm_0 = const()[name = string("transpose_278_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_278 = transpose(perm = transpose_278_perm_0, x = k_55_cast_fp16)[name = string("transpose_598")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_277 = transpose(perm = transpose_277_perm_0, x = q_43_cast_fp16)[name = string("transpose_599")];
+            tensor<fp16, [1, 20, ?, ?]> qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_277, y = transpose_278)[name = string("qk_31_cast_fp16")];
+            int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)];
+            int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)];
+            bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")];
+            tensor<int32, [2]> var_1322_begin_0 = const()[name = string("op_1322_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1322_end_mask_0 = const()[name = string("op_1322_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1322_cast_fp16 = slice_by_index(begin = var_1322_begin_0, end = concat_125, end_mask = var_1322_end_mask_0, x = mask_to_fp16)[name = string("op_1322_cast_fp16")];
+            int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)];
+            int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)];
+            bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")];
+            tensor<int32, [2]> var_1323_begin_0 = const()[name = string("op_1323_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1323_end_mask_0 = const()[name = string("op_1323_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1323_cast_fp16 = slice_by_index(begin = var_1323_begin_0, end = concat_126, end_mask = var_1323_end_mask_0, x = var_1322_cast_fp16)[name = string("op_1323_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1323_cast_fp16)[name = string("qk_33_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_1326_cast_fp16 = softmax(axis = var_1235, x = qk_33_cast_fp16)[name = string("op_1326_cast_fp16")];
+            bool var_1328_transpose_x_0 = const()[name = string("op_1328_transpose_x_0"), val = bool(false)];
+            bool var_1328_transpose_y_0 = const()[name = string("op_1328_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_55_cast_fp16 = transpose(perm = var_1319, x = var_1318_cast_fp16)[name = string("transpose_600")];
+            tensor<fp16, [1, 20, ?, 64]> var_1328_cast_fp16 = matmul(transpose_x = var_1328_transpose_x_0, transpose_y = var_1328_transpose_y_0, x = var_1326_cast_fp16, y = v_55_cast_fp16)[name = string("op_1328_cast_fp16")];
+            tensor<int32, [4]> var_1329 = const()[name = string("op_1329"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_127x = const()[name = string("concat_127x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1330_cast_fp16 = transpose(perm = var_1329, x = var_1328_cast_fp16)[name = string("transpose_597")];
+            tensor<fp16, [1, ?, 1280]> x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1330_cast_fp16)[name = string("x_97_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1334_to_fp16 = const()[name = string("op_1334_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377596800)))];
+            tensor<fp16, [1280]> var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380873664)))];
+            tensor<fp16, [1, ?, 1280]> linear_43_cast_fp16 = linear(bias = var_1335_to_fp16, weight = var_1334_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")];
+            tensor<int32, [1]> var_1342_axes_0 = const()[name = string("op_1342_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380876288)))];
+            tensor<fp16, [1280]> blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380878912)))];
+            tensor<fp16, [1, ?, 1280]> var_1342_cast_fp16 = layer_norm(axes = var_1342_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1342_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1351_to_fp16 = const()[name = string("op_1351_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380881536)))];
+            tensor<fp16, [1280]> var_1352_to_fp16 = const()[name = string("op_1352_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384158400)))];
+            tensor<fp16, [1, ?, 1280]> linear_44_cast_fp16 = linear(bias = var_1352_to_fp16, weight = var_1351_to_fp16, x = var_1342_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [3]> concat_128 = const()[name = string("concat_128"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_129 = const()[name = string("concat_129"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_130 = const()[name = string("concat_130"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_131 = const()[name = string("concat_131"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_132x = const()[name = string("concat_132x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1372_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1372_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_47_cast_fp16 = mul(x = var_1372_cast_fp16, y = const_182_to_fp16)[name = string("q_47_cast_fp16")];
+            tensor<int32, [4]> var_1378 = const()[name = string("op_1378"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1379_cast_fp16 = reshape(shape = var_1378, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1379_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_59_cast_fp16 = mul(x = var_1379_cast_fp16, y = const_183_to_fp16)[name = string("k_59_cast_fp16")];
+            tensor<int32, [4]> var_1385 = const()[name = string("op_1385"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1386_cast_fp16 = reshape(shape = var_1385, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1386_cast_fp16")];
+            tensor<int32, [4]> var_1387 = const()[name = string("op_1387"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)];
+            bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_279_perm_0 = const()[name = string("transpose_279_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_280_perm_0 = const()[name = string("transpose_280_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_280 = transpose(perm = transpose_280_perm_0, x = k_59_cast_fp16)[name = string("transpose_594")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_279 = transpose(perm = transpose_279_perm_0, x = q_47_cast_fp16)[name = string("transpose_595")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_279, y = transpose_280)[name = string("qk_35_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_1391_cast_fp16 = softmax(axis = var_1235, x = qk_35_cast_fp16)[name = string("op_1391_cast_fp16")];
+            bool var_1393_transpose_x_0 = const()[name = string("op_1393_transpose_x_0"), val = bool(false)];
+            bool var_1393_transpose_y_0 = const()[name = string("op_1393_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_59_cast_fp16 = transpose(perm = var_1387, x = var_1386_cast_fp16)[name = string("transpose_596")];
+            tensor<fp16, [1, 20, ?, 64]> var_1393_cast_fp16 = matmul(transpose_x = var_1393_transpose_x_0, transpose_y = var_1393_transpose_y_0, x = var_1391_cast_fp16, y = v_59_cast_fp16)[name = string("op_1393_cast_fp16")];
+            tensor<int32, [4]> var_1394 = const()[name = string("op_1394"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_133x = const()[name = string("concat_133x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1395_cast_fp16 = transpose(perm = var_1394, x = var_1393_cast_fp16)[name = string("transpose_593")];
+            tensor<fp16, [1, ?, 1280]> x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1395_cast_fp16)[name = string("x_103_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1399_to_fp16 = const()[name = string("op_1399_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384161024)))];
+            tensor<fp16, [1280]> var_1400_to_fp16 = const()[name = string("op_1400_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387437888)))];
+            tensor<fp16, [1, ?, 1280]> linear_45_cast_fp16 = linear(bias = var_1400_to_fp16, weight = var_1399_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")];
+            tensor<int32, [1]> var_1407_axes_0 = const()[name = string("op_1407_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387440512)))];
+            tensor<fp16, [1280]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387443136)))];
+            tensor<fp16, [1, ?, 1280]> var_1407_cast_fp16 = layer_norm(axes = var_1407_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1407_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1416_to_fp16 = const()[name = string("op_1416_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387445760)))];
+            tensor<fp16, [5120]> var_1417_to_fp16 = const()[name = string("op_1417_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400553024)))];
+            tensor<fp16, [1, ?, 5120]> linear_46_cast_fp16 = linear(bias = var_1417_to_fp16, weight = var_1416_to_fp16, x = var_1407_cast_fp16)[name = string("linear_46_cast_fp16")];
+            string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1422_to_fp16 = const()[name = string("op_1422_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400563328)))];
+            tensor<fp16, [1280]> var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413670592)))];
+            tensor<fp16, [1, ?, 1280]> linear_47_cast_fp16 = linear(bias = var_1423_to_fp16, weight = var_1422_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")];
+            tensor<int32, [4]> k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor<int32, [4]>([7, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_74)[name = string("k_cache_25_cast_fp16")];
+            tensor<int32, [4]> v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor<int32, [4]>([7, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_75)[name = string("v_cache_25_cast_fp16")];
+            tensor<int32, [4]> k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor<int32, [4]>([7, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")];
+            tensor<int32, [4]> v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor<int32, [4]>([7, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")];
+            int32 var_1446 = const()[name = string("op_1446"), val = int32(-1)];
+            tensor<int32, [1]> var_1464_axes_0 = const()[name = string("op_1464_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413673216)))];
+            tensor<fp16, [1280]> blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413675840)))];
+            fp16 var_1452_to_fp16 = const()[name = string("op_1452_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_1464_cast_fp16 = layer_norm(axes = var_1464_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1464_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1475_to_fp16 = const()[name = string("op_1475_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413678464)))];
+            tensor<fp16, [1280]> var_1476_to_fp16 = const()[name = string("op_1476_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416955328)))];
+            tensor<fp16, [1, ?, 1280]> linear_48_cast_fp16 = linear(bias = var_1476_to_fp16, weight = var_1475_to_fp16, x = var_1464_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1479_to_fp16 = const()[name = string("op_1479_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416957952)))];
+            tensor<fp16, [1, ?, 1280]> linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1479_to_fp16, x = var_1464_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420234816)))];
+            tensor<fp16, [1280]> var_1484_to_fp16 = const()[name = string("op_1484_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423511680)))];
+            tensor<fp16, [1, ?, 1280]> linear_50_cast_fp16 = linear(bias = var_1484_to_fp16, weight = var_1483_to_fp16, x = var_1464_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [3]> var_1486_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1486_shape_cast_fp16")];
+            int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)];
+            int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)];
+            bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)];
+            string var_1486_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1486_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1486_shape_cast_fp16_to_uint16 = cast(dtype = var_1486_shape_cast_fp16_to_uint16_dtype_0, x = var_1486_shape_cast_fp16)[name = string("cast_378")];
+            uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1486_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")];
+            string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_377")];
+            int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")];
+            tensor<int32, [1]> expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")];
+            tensor<int32, [1]> concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor<int32, [1]>([6])];
+            int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)];
+            bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")];
+            tensor<int32, [1]> concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)];
+            bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_74)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_76_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_76 = read_state(input = k_cache1)[name = string("coreml_update_state_76")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_75)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_77_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_77 = read_state(input = v_cache1)[name = string("coreml_update_state_77")];
+            int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)];
+            int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(1280)];
+            int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)];
+            bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")];
+            tensor<int32, [3]> var_1502_begin_0 = const()[name = string("op_1502_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1502_end_mask_0 = const()[name = string("op_1502_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = concat_142, end_mask = var_1502_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1502_cast_fp16")];
+            tensor<int32, [3]> var_1505_begin_0 = const()[name = string("op_1505_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1505_end_mask_0 = const()[name = string("op_1505_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1505_cast_fp16 = slice_by_index(begin = var_1505_begin_0, end = concat_142, end_mask = var_1505_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1505_cast_fp16")];
+            tensor<int32, [4]> concat_144x = const()[name = string("concat_144x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1515_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1515_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_184_to_fp16 = const()[name = string("const_184_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_51_cast_fp16 = mul(x = var_1515_cast_fp16, y = const_184_to_fp16)[name = string("q_51_cast_fp16")];
+            tensor<int32, [4]> concat_145x = const()[name = string("concat_145x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1522_cast_fp16 = reshape(shape = concat_145x, x = var_1502_cast_fp16)[name = string("op_1522_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_185_to_fp16 = const()[name = string("const_185_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_65_cast_fp16 = mul(x = var_1522_cast_fp16, y = const_185_to_fp16)[name = string("k_65_cast_fp16")];
+            tensor<int32, [4]> concat_146x = const()[name = string("concat_146x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1529_cast_fp16 = reshape(shape = concat_146x, x = var_1505_cast_fp16)[name = string("op_1529_cast_fp16")];
+            tensor<int32, [4]> var_1530 = const()[name = string("op_1530"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)];
+            bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_281_perm_0 = const()[name = string("transpose_281_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_282_perm_0 = const()[name = string("transpose_282_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_282 = transpose(perm = transpose_282_perm_0, x = k_65_cast_fp16)[name = string("transpose_590")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_281 = transpose(perm = transpose_281_perm_0, x = q_51_cast_fp16)[name = string("transpose_591")];
+            tensor<fp16, [1, 20, ?, ?]> qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_281, y = transpose_282)[name = string("qk_37_cast_fp16")];
+            int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)];
+            int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)];
+            bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")];
+            tensor<int32, [2]> var_1533_begin_0 = const()[name = string("op_1533_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1533_end_mask_0 = const()[name = string("op_1533_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1533_cast_fp16 = slice_by_index(begin = var_1533_begin_0, end = concat_147, end_mask = var_1533_end_mask_0, x = mask_to_fp16)[name = string("op_1533_cast_fp16")];
+            int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)];
+            int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)];
+            bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")];
+            tensor<int32, [2]> var_1534_begin_0 = const()[name = string("op_1534_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1534_end_mask_0 = const()[name = string("op_1534_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1534_cast_fp16 = slice_by_index(begin = var_1534_begin_0, end = concat_148, end_mask = var_1534_end_mask_0, x = var_1533_cast_fp16)[name = string("op_1534_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1534_cast_fp16)[name = string("qk_39_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_1537_cast_fp16 = softmax(axis = var_1446, x = qk_39_cast_fp16)[name = string("op_1537_cast_fp16")];
+            bool var_1539_transpose_x_0 = const()[name = string("op_1539_transpose_x_0"), val = bool(false)];
+            bool var_1539_transpose_y_0 = const()[name = string("op_1539_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_65_cast_fp16 = transpose(perm = var_1530, x = var_1529_cast_fp16)[name = string("transpose_592")];
+            tensor<fp16, [1, 20, ?, 64]> var_1539_cast_fp16 = matmul(transpose_x = var_1539_transpose_x_0, transpose_y = var_1539_transpose_y_0, x = var_1537_cast_fp16, y = v_65_cast_fp16)[name = string("op_1539_cast_fp16")];
+            tensor<int32, [4]> var_1540 = const()[name = string("op_1540"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_149x = const()[name = string("concat_149x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1541_cast_fp16 = transpose(perm = var_1540, x = var_1539_cast_fp16)[name = string("transpose_589")];
+            tensor<fp16, [1, ?, 1280]> x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1541_cast_fp16)[name = string("x_115_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423514304)))];
+            tensor<fp16, [1280]> var_1546_to_fp16 = const()[name = string("op_1546_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426791168)))];
+            tensor<fp16, [1, ?, 1280]> linear_51_cast_fp16 = linear(bias = var_1546_to_fp16, weight = var_1545_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")];
+            tensor<int32, [1]> var_1553_axes_0 = const()[name = string("op_1553_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426793792)))];
+            tensor<fp16, [1280]> blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426796416)))];
+            tensor<fp16, [1, ?, 1280]> var_1553_cast_fp16 = layer_norm(axes = var_1553_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1553_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1562_to_fp16 = const()[name = string("op_1562_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426799040)))];
+            tensor<fp16, [1280]> var_1563_to_fp16 = const()[name = string("op_1563_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430075904)))];
+            tensor<fp16, [1, ?, 1280]> linear_52_cast_fp16 = linear(bias = var_1563_to_fp16, weight = var_1562_to_fp16, x = var_1553_cast_fp16)[name = string("linear_52_cast_fp16")];
+            tensor<int32, [3]> concat_150 = const()[name = string("concat_150"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_151 = const()[name = string("concat_151"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_152 = const()[name = string("concat_152"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_153 = const()[name = string("concat_153"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_154x = const()[name = string("concat_154x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1583_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1583_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_55_cast_fp16 = mul(x = var_1583_cast_fp16, y = const_186_to_fp16)[name = string("q_55_cast_fp16")];
+            tensor<int32, [4]> var_1589 = const()[name = string("op_1589"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1590_cast_fp16 = reshape(shape = var_1589, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1590_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_187_to_fp16 = const()[name = string("const_187_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_69_cast_fp16 = mul(x = var_1590_cast_fp16, y = const_187_to_fp16)[name = string("k_69_cast_fp16")];
+            tensor<int32, [4]> var_1596 = const()[name = string("op_1596"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1597_cast_fp16 = reshape(shape = var_1596, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1597_cast_fp16")];
+            tensor<int32, [4]> var_1598 = const()[name = string("op_1598"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)];
+            bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_283_perm_0 = const()[name = string("transpose_283_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_284_perm_0 = const()[name = string("transpose_284_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_284 = transpose(perm = transpose_284_perm_0, x = k_69_cast_fp16)[name = string("transpose_586")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_283 = transpose(perm = transpose_283_perm_0, x = q_55_cast_fp16)[name = string("transpose_587")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_283, y = transpose_284)[name = string("qk_41_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_1602_cast_fp16 = softmax(axis = var_1446, x = qk_41_cast_fp16)[name = string("op_1602_cast_fp16")];
+            bool var_1604_transpose_x_0 = const()[name = string("op_1604_transpose_x_0"), val = bool(false)];
+            bool var_1604_transpose_y_0 = const()[name = string("op_1604_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_69_cast_fp16 = transpose(perm = var_1598, x = var_1597_cast_fp16)[name = string("transpose_588")];
+            tensor<fp16, [1, 20, ?, 64]> var_1604_cast_fp16 = matmul(transpose_x = var_1604_transpose_x_0, transpose_y = var_1604_transpose_y_0, x = var_1602_cast_fp16, y = v_69_cast_fp16)[name = string("op_1604_cast_fp16")];
+            tensor<int32, [4]> var_1605 = const()[name = string("op_1605"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_155x = const()[name = string("concat_155x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1606_cast_fp16 = transpose(perm = var_1605, x = var_1604_cast_fp16)[name = string("transpose_585")];
+            tensor<fp16, [1, ?, 1280]> x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1606_cast_fp16)[name = string("x_121_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430078528)))];
+            tensor<fp16, [1280]> var_1611_to_fp16 = const()[name = string("op_1611_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433355392)))];
+            tensor<fp16, [1, ?, 1280]> linear_53_cast_fp16 = linear(bias = var_1611_to_fp16, weight = var_1610_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")];
+            tensor<int32, [1]> var_1618_axes_0 = const()[name = string("op_1618_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433358016)))];
+            tensor<fp16, [1280]> blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433360640)))];
+            tensor<fp16, [1, ?, 1280]> var_1618_cast_fp16 = layer_norm(axes = var_1618_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1618_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1627_to_fp16 = const()[name = string("op_1627_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433363264)))];
+            tensor<fp16, [5120]> var_1628_to_fp16 = const()[name = string("op_1628_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446470528)))];
+            tensor<fp16, [1, ?, 5120]> linear_54_cast_fp16 = linear(bias = var_1628_to_fp16, weight = var_1627_to_fp16, x = var_1618_cast_fp16)[name = string("linear_54_cast_fp16")];
+            string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446480832)))];
+            tensor<fp16, [1280]> var_1634_to_fp16 = const()[name = string("op_1634_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459588096)))];
+            tensor<fp16, [1, ?, 1280]> linear_55_cast_fp16 = linear(bias = var_1634_to_fp16, weight = var_1633_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")];
+            tensor<int32, [4]> k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor<int32, [4]>([8, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_76)[name = string("k_cache_29_cast_fp16")];
+            tensor<int32, [4]> v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor<int32, [4]>([8, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_77)[name = string("v_cache_29_cast_fp16")];
+            tensor<int32, [4]> k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor<int32, [4]>([8, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")];
+            tensor<int32, [4]> v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor<int32, [4]>([8, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")];
+            int32 var_1657 = const()[name = string("op_1657"), val = int32(-1)];
+            tensor<int32, [1]> var_1675_axes_0 = const()[name = string("op_1675_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459590720)))];
+            tensor<fp16, [1280]> blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459593344)))];
+            fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_1675_cast_fp16 = layer_norm(axes = var_1675_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1675_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459595968)))];
+            tensor<fp16, [1280]> var_1687_to_fp16 = const()[name = string("op_1687_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462872832)))];
+            tensor<fp16, [1, ?, 1280]> linear_56_cast_fp16 = linear(bias = var_1687_to_fp16, weight = var_1686_to_fp16, x = var_1675_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1690_to_fp16 = const()[name = string("op_1690_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462875456)))];
+            tensor<fp16, [1, ?, 1280]> linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1690_to_fp16, x = var_1675_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466152320)))];
+            tensor<fp16, [1280]> var_1695_to_fp16 = const()[name = string("op_1695_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469429184)))];
+            tensor<fp16, [1, ?, 1280]> linear_58_cast_fp16 = linear(bias = var_1695_to_fp16, weight = var_1694_to_fp16, x = var_1675_cast_fp16)[name = string("linear_58_cast_fp16")];
+            tensor<int32, [3]> var_1697_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1697_shape_cast_fp16")];
+            int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)];
+            int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)];
+            bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)];
+            string var_1697_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1697_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1697_shape_cast_fp16_to_uint16 = cast(dtype = var_1697_shape_cast_fp16_to_uint16_dtype_0, x = var_1697_shape_cast_fp16)[name = string("cast_376")];
+            uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1697_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")];
+            string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_375")];
+            int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")];
+            tensor<int32, [1]> expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")];
+            tensor<int32, [1]> concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor<int32, [1]>([7])];
+            int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)];
+            bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")];
+            tensor<int32, [1]> concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)];
+            bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_76)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_78_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_78 = read_state(input = k_cache1)[name = string("coreml_update_state_78")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_77)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_79_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_79 = read_state(input = v_cache1)[name = string("coreml_update_state_79")];
+            int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)];
+            int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(1280)];
+            int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)];
+            bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")];
+            tensor<int32, [3]> var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = concat_164, end_mask = var_1713_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1713_cast_fp16")];
+            tensor<int32, [3]> var_1716_begin_0 = const()[name = string("op_1716_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1716_end_mask_0 = const()[name = string("op_1716_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1716_cast_fp16 = slice_by_index(begin = var_1716_begin_0, end = concat_164, end_mask = var_1716_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1716_cast_fp16")];
+            tensor<int32, [4]> concat_166x = const()[name = string("concat_166x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1726_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1726_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_59_cast_fp16 = mul(x = var_1726_cast_fp16, y = const_188_to_fp16)[name = string("q_59_cast_fp16")];
+            tensor<int32, [4]> concat_167x = const()[name = string("concat_167x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1733_cast_fp16 = reshape(shape = concat_167x, x = var_1713_cast_fp16)[name = string("op_1733_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_75_cast_fp16 = mul(x = var_1733_cast_fp16, y = const_189_to_fp16)[name = string("k_75_cast_fp16")];
+            tensor<int32, [4]> concat_168x = const()[name = string("concat_168x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1740_cast_fp16 = reshape(shape = concat_168x, x = var_1716_cast_fp16)[name = string("op_1740_cast_fp16")];
+            tensor<int32, [4]> var_1741 = const()[name = string("op_1741"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)];
+            bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_285_perm_0 = const()[name = string("transpose_285_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_286_perm_0 = const()[name = string("transpose_286_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_286 = transpose(perm = transpose_286_perm_0, x = k_75_cast_fp16)[name = string("transpose_582")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_285 = transpose(perm = transpose_285_perm_0, x = q_59_cast_fp16)[name = string("transpose_583")];
+            tensor<fp16, [1, 20, ?, ?]> qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_285, y = transpose_286)[name = string("qk_43_cast_fp16")];
+            int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)];
+            int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)];
+            bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")];
+            tensor<int32, [2]> var_1744_begin_0 = const()[name = string("op_1744_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1744_end_mask_0 = const()[name = string("op_1744_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1744_cast_fp16 = slice_by_index(begin = var_1744_begin_0, end = concat_169, end_mask = var_1744_end_mask_0, x = mask_to_fp16)[name = string("op_1744_cast_fp16")];
+            int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)];
+            int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)];
+            bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")];
+            tensor<int32, [2]> var_1745_begin_0 = const()[name = string("op_1745_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1745_end_mask_0 = const()[name = string("op_1745_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1745_cast_fp16 = slice_by_index(begin = var_1745_begin_0, end = concat_170, end_mask = var_1745_end_mask_0, x = var_1744_cast_fp16)[name = string("op_1745_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1745_cast_fp16)[name = string("qk_45_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_1748_cast_fp16 = softmax(axis = var_1657, x = qk_45_cast_fp16)[name = string("op_1748_cast_fp16")];
+            bool var_1750_transpose_x_0 = const()[name = string("op_1750_transpose_x_0"), val = bool(false)];
+            bool var_1750_transpose_y_0 = const()[name = string("op_1750_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_75_cast_fp16 = transpose(perm = var_1741, x = var_1740_cast_fp16)[name = string("transpose_584")];
+            tensor<fp16, [1, 20, ?, 64]> var_1750_cast_fp16 = matmul(transpose_x = var_1750_transpose_x_0, transpose_y = var_1750_transpose_y_0, x = var_1748_cast_fp16, y = v_75_cast_fp16)[name = string("op_1750_cast_fp16")];
+            tensor<int32, [4]> var_1751 = const()[name = string("op_1751"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_171x = const()[name = string("concat_171x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1752_cast_fp16 = transpose(perm = var_1751, x = var_1750_cast_fp16)[name = string("transpose_581")];
+            tensor<fp16, [1, ?, 1280]> x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1752_cast_fp16)[name = string("x_133_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1756_to_fp16 = const()[name = string("op_1756_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469431808)))];
+            tensor<fp16, [1280]> var_1757_to_fp16 = const()[name = string("op_1757_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472708672)))];
+            tensor<fp16, [1, ?, 1280]> linear_59_cast_fp16 = linear(bias = var_1757_to_fp16, weight = var_1756_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")];
+            tensor<int32, [1]> var_1764_axes_0 = const()[name = string("op_1764_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472711296)))];
+            tensor<fp16, [1280]> blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472713920)))];
+            tensor<fp16, [1, ?, 1280]> var_1764_cast_fp16 = layer_norm(axes = var_1764_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1764_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1773_to_fp16 = const()[name = string("op_1773_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472716544)))];
+            tensor<fp16, [1280]> var_1774_to_fp16 = const()[name = string("op_1774_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475993408)))];
+            tensor<fp16, [1, ?, 1280]> linear_60_cast_fp16 = linear(bias = var_1774_to_fp16, weight = var_1773_to_fp16, x = var_1764_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<int32, [3]> concat_172 = const()[name = string("concat_172"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_173 = const()[name = string("concat_173"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_174 = const()[name = string("concat_174"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_175 = const()[name = string("concat_175"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_176x = const()[name = string("concat_176x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1794_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1794_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_63_cast_fp16 = mul(x = var_1794_cast_fp16, y = const_190_to_fp16)[name = string("q_63_cast_fp16")];
+            tensor<int32, [4]> var_1800 = const()[name = string("op_1800"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1801_cast_fp16 = reshape(shape = var_1800, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1801_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_79_cast_fp16 = mul(x = var_1801_cast_fp16, y = const_191_to_fp16)[name = string("k_79_cast_fp16")];
+            tensor<int32, [4]> var_1807 = const()[name = string("op_1807"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1808_cast_fp16 = reshape(shape = var_1807, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1808_cast_fp16")];
+            tensor<int32, [4]> var_1809 = const()[name = string("op_1809"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)];
+            bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_287_perm_0 = const()[name = string("transpose_287_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_288_perm_0 = const()[name = string("transpose_288_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_288 = transpose(perm = transpose_288_perm_0, x = k_79_cast_fp16)[name = string("transpose_578")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_287 = transpose(perm = transpose_287_perm_0, x = q_63_cast_fp16)[name = string("transpose_579")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_287, y = transpose_288)[name = string("qk_47_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_1813_cast_fp16 = softmax(axis = var_1657, x = qk_47_cast_fp16)[name = string("op_1813_cast_fp16")];
+            bool var_1815_transpose_x_0 = const()[name = string("op_1815_transpose_x_0"), val = bool(false)];
+            bool var_1815_transpose_y_0 = const()[name = string("op_1815_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_79_cast_fp16 = transpose(perm = var_1809, x = var_1808_cast_fp16)[name = string("transpose_580")];
+            tensor<fp16, [1, 20, ?, 64]> var_1815_cast_fp16 = matmul(transpose_x = var_1815_transpose_x_0, transpose_y = var_1815_transpose_y_0, x = var_1813_cast_fp16, y = v_79_cast_fp16)[name = string("op_1815_cast_fp16")];
+            tensor<int32, [4]> var_1816 = const()[name = string("op_1816"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_177x = const()[name = string("concat_177x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1817_cast_fp16 = transpose(perm = var_1816, x = var_1815_cast_fp16)[name = string("transpose_577")];
+            tensor<fp16, [1, ?, 1280]> x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1817_cast_fp16)[name = string("x_139_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1821_to_fp16 = const()[name = string("op_1821_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475996032)))];
+            tensor<fp16, [1280]> var_1822_to_fp16 = const()[name = string("op_1822_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479272896)))];
+            tensor<fp16, [1, ?, 1280]> linear_61_cast_fp16 = linear(bias = var_1822_to_fp16, weight = var_1821_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")];
+            tensor<int32, [1]> var_1829_axes_0 = const()[name = string("op_1829_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479275520)))];
+            tensor<fp16, [1280]> blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479278144)))];
+            tensor<fp16, [1, ?, 1280]> var_1829_cast_fp16 = layer_norm(axes = var_1829_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1829_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1838_to_fp16 = const()[name = string("op_1838_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479280768)))];
+            tensor<fp16, [5120]> var_1839_to_fp16 = const()[name = string("op_1839_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492388032)))];
+            tensor<fp16, [1, ?, 5120]> linear_62_cast_fp16 = linear(bias = var_1839_to_fp16, weight = var_1838_to_fp16, x = var_1829_cast_fp16)[name = string("linear_62_cast_fp16")];
+            string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1844_to_fp16 = const()[name = string("op_1844_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492398336)))];
+            tensor<fp16, [1280]> var_1845_to_fp16 = const()[name = string("op_1845_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505505600)))];
+            tensor<fp16, [1, ?, 1280]> linear_63_cast_fp16 = linear(bias = var_1845_to_fp16, weight = var_1844_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")];
+            tensor<int32, [4]> k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor<int32, [4]>([9, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_78)[name = string("k_cache_33_cast_fp16")];
+            tensor<int32, [4]> v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor<int32, [4]>([9, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_79)[name = string("v_cache_33_cast_fp16")];
+            tensor<int32, [4]> k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor<int32, [4]>([9, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")];
+            tensor<int32, [4]> v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor<int32, [4]>([9, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")];
+            int32 var_1868 = const()[name = string("op_1868"), val = int32(-1)];
+            tensor<int32, [1]> var_1886_axes_0 = const()[name = string("op_1886_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505508224)))];
+            tensor<fp16, [1280]> blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505510848)))];
+            fp16 var_1874_to_fp16 = const()[name = string("op_1874_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_1886_cast_fp16 = layer_norm(axes = var_1886_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1886_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1897_to_fp16 = const()[name = string("op_1897_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505513472)))];
+            tensor<fp16, [1280]> var_1898_to_fp16 = const()[name = string("op_1898_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508790336)))];
+            tensor<fp16, [1, ?, 1280]> linear_64_cast_fp16 = linear(bias = var_1898_to_fp16, weight = var_1897_to_fp16, x = var_1886_cast_fp16)[name = string("linear_64_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1901_to_fp16 = const()[name = string("op_1901_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508792960)))];
+            tensor<fp16, [1, ?, 1280]> linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1901_to_fp16, x = var_1886_cast_fp16)[name = string("linear_65_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512069824)))];
+            tensor<fp16, [1280]> var_1906_to_fp16 = const()[name = string("op_1906_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515346688)))];
+            tensor<fp16, [1, ?, 1280]> linear_66_cast_fp16 = linear(bias = var_1906_to_fp16, weight = var_1905_to_fp16, x = var_1886_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<int32, [3]> var_1908_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1908_shape_cast_fp16")];
+            int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)];
+            int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)];
+            bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)];
+            string var_1908_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1908_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1908_shape_cast_fp16_to_uint16 = cast(dtype = var_1908_shape_cast_fp16_to_uint16_dtype_0, x = var_1908_shape_cast_fp16)[name = string("cast_374")];
+            uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1908_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")];
+            string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_373")];
+            int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")];
+            tensor<int32, [1]> expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")];
+            tensor<int32, [1]> concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor<int32, [1]>([8])];
+            int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)];
+            bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")];
+            tensor<int32, [1]> concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)];
+            bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_78)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_80_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_80 = read_state(input = k_cache1)[name = string("coreml_update_state_80")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_79)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_81_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_81 = read_state(input = v_cache1)[name = string("coreml_update_state_81")];
+            int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)];
+            int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(1280)];
+            int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)];
+            bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")];
+            tensor<int32, [3]> var_1924_begin_0 = const()[name = string("op_1924_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1924_end_mask_0 = const()[name = string("op_1924_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = concat_186, end_mask = var_1924_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1924_cast_fp16")];
+            tensor<int32, [3]> var_1927_begin_0 = const()[name = string("op_1927_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1927_end_mask_0 = const()[name = string("op_1927_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_1927_cast_fp16 = slice_by_index(begin = var_1927_begin_0, end = concat_186, end_mask = var_1927_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1927_cast_fp16")];
+            tensor<int32, [4]> concat_188x = const()[name = string("concat_188x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1937_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1937_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_67_cast_fp16 = mul(x = var_1937_cast_fp16, y = const_192_to_fp16)[name = string("q_67_cast_fp16")];
+            tensor<int32, [4]> concat_189x = const()[name = string("concat_189x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1944_cast_fp16 = reshape(shape = concat_189x, x = var_1924_cast_fp16)[name = string("op_1944_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_85_cast_fp16 = mul(x = var_1944_cast_fp16, y = const_193_to_fp16)[name = string("k_85_cast_fp16")];
+            tensor<int32, [4]> concat_190x = const()[name = string("concat_190x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_1951_cast_fp16 = reshape(shape = concat_190x, x = var_1927_cast_fp16)[name = string("op_1951_cast_fp16")];
+            tensor<int32, [4]> var_1952 = const()[name = string("op_1952"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)];
+            bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_289_perm_0 = const()[name = string("transpose_289_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_290_perm_0 = const()[name = string("transpose_290_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_290 = transpose(perm = transpose_290_perm_0, x = k_85_cast_fp16)[name = string("transpose_574")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_289 = transpose(perm = transpose_289_perm_0, x = q_67_cast_fp16)[name = string("transpose_575")];
+            tensor<fp16, [1, 20, ?, ?]> qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_289, y = transpose_290)[name = string("qk_49_cast_fp16")];
+            int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)];
+            int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)];
+            bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")];
+            tensor<int32, [2]> var_1955_begin_0 = const()[name = string("op_1955_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1955_end_mask_0 = const()[name = string("op_1955_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1955_cast_fp16 = slice_by_index(begin = var_1955_begin_0, end = concat_191, end_mask = var_1955_end_mask_0, x = mask_to_fp16)[name = string("op_1955_cast_fp16")];
+            int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)];
+            int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)];
+            bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")];
+            tensor<int32, [2]> var_1956_begin_0 = const()[name = string("op_1956_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1956_end_mask_0 = const()[name = string("op_1956_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1956_cast_fp16 = slice_by_index(begin = var_1956_begin_0, end = concat_192, end_mask = var_1956_end_mask_0, x = var_1955_cast_fp16)[name = string("op_1956_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1956_cast_fp16)[name = string("qk_51_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_1959_cast_fp16 = softmax(axis = var_1868, x = qk_51_cast_fp16)[name = string("op_1959_cast_fp16")];
+            bool var_1961_transpose_x_0 = const()[name = string("op_1961_transpose_x_0"), val = bool(false)];
+            bool var_1961_transpose_y_0 = const()[name = string("op_1961_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_85_cast_fp16 = transpose(perm = var_1952, x = var_1951_cast_fp16)[name = string("transpose_576")];
+            tensor<fp16, [1, 20, ?, 64]> var_1961_cast_fp16 = matmul(transpose_x = var_1961_transpose_x_0, transpose_y = var_1961_transpose_y_0, x = var_1959_cast_fp16, y = v_85_cast_fp16)[name = string("op_1961_cast_fp16")];
+            tensor<int32, [4]> var_1962 = const()[name = string("op_1962"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_193x = const()[name = string("concat_193x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_1963_cast_fp16 = transpose(perm = var_1962, x = var_1961_cast_fp16)[name = string("transpose_573")];
+            tensor<fp16, [1, ?, 1280]> x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1963_cast_fp16)[name = string("x_151_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1967_to_fp16 = const()[name = string("op_1967_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515349312)))];
+            tensor<fp16, [1280]> var_1968_to_fp16 = const()[name = string("op_1968_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518626176)))];
+            tensor<fp16, [1, ?, 1280]> linear_67_cast_fp16 = linear(bias = var_1968_to_fp16, weight = var_1967_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")];
+            tensor<int32, [1]> var_1975_axes_0 = const()[name = string("op_1975_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518628800)))];
+            tensor<fp16, [1280]> blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518631424)))];
+            tensor<fp16, [1, ?, 1280]> var_1975_cast_fp16 = layer_norm(axes = var_1975_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1975_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1984_to_fp16 = const()[name = string("op_1984_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518634048)))];
+            tensor<fp16, [1280]> var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521910912)))];
+            tensor<fp16, [1, ?, 1280]> linear_68_cast_fp16 = linear(bias = var_1985_to_fp16, weight = var_1984_to_fp16, x = var_1975_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [3]> concat_194 = const()[name = string("concat_194"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_195 = const()[name = string("concat_195"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_196 = const()[name = string("concat_196"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_197 = const()[name = string("concat_197"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_198x = const()[name = string("concat_198x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2005_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_2005_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_194_to_fp16 = const()[name = string("const_194_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_71_cast_fp16 = mul(x = var_2005_cast_fp16, y = const_194_to_fp16)[name = string("q_71_cast_fp16")];
+            tensor<int32, [4]> var_2011 = const()[name = string("op_2011"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2012_cast_fp16 = reshape(shape = var_2011, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_2012_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_89_cast_fp16 = mul(x = var_2012_cast_fp16, y = const_195_to_fp16)[name = string("k_89_cast_fp16")];
+            tensor<int32, [4]> var_2018 = const()[name = string("op_2018"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2019_cast_fp16 = reshape(shape = var_2018, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_2019_cast_fp16")];
+            tensor<int32, [4]> var_2020 = const()[name = string("op_2020"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)];
+            bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_291_perm_0 = const()[name = string("transpose_291_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_292_perm_0 = const()[name = string("transpose_292_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_292 = transpose(perm = transpose_292_perm_0, x = k_89_cast_fp16)[name = string("transpose_570")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_291 = transpose(perm = transpose_291_perm_0, x = q_71_cast_fp16)[name = string("transpose_571")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_291, y = transpose_292)[name = string("qk_53_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_2024_cast_fp16 = softmax(axis = var_1868, x = qk_53_cast_fp16)[name = string("op_2024_cast_fp16")];
+            bool var_2026_transpose_x_0 = const()[name = string("op_2026_transpose_x_0"), val = bool(false)];
+            bool var_2026_transpose_y_0 = const()[name = string("op_2026_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_89_cast_fp16 = transpose(perm = var_2020, x = var_2019_cast_fp16)[name = string("transpose_572")];
+            tensor<fp16, [1, 20, ?, 64]> var_2026_cast_fp16 = matmul(transpose_x = var_2026_transpose_x_0, transpose_y = var_2026_transpose_y_0, x = var_2024_cast_fp16, y = v_89_cast_fp16)[name = string("op_2026_cast_fp16")];
+            tensor<int32, [4]> var_2027 = const()[name = string("op_2027"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_199x = const()[name = string("concat_199x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2028_cast_fp16 = transpose(perm = var_2027, x = var_2026_cast_fp16)[name = string("transpose_569")];
+            tensor<fp16, [1, ?, 1280]> x_157_cast_fp16 = reshape(shape = concat_199x, x = var_2028_cast_fp16)[name = string("x_157_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2032_to_fp16 = const()[name = string("op_2032_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521913536)))];
+            tensor<fp16, [1280]> var_2033_to_fp16 = const()[name = string("op_2033_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525190400)))];
+            tensor<fp16, [1, ?, 1280]> linear_69_cast_fp16 = linear(bias = var_2033_to_fp16, weight = var_2032_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")];
+            tensor<int32, [1]> var_2040_axes_0 = const()[name = string("op_2040_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525193024)))];
+            tensor<fp16, [1280]> blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525195648)))];
+            tensor<fp16, [1, ?, 1280]> var_2040_cast_fp16 = layer_norm(axes = var_2040_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_2040_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2049_to_fp16 = const()[name = string("op_2049_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525198272)))];
+            tensor<fp16, [5120]> var_2050_to_fp16 = const()[name = string("op_2050_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538305536)))];
+            tensor<fp16, [1, ?, 5120]> linear_70_cast_fp16 = linear(bias = var_2050_to_fp16, weight = var_2049_to_fp16, x = var_2040_cast_fp16)[name = string("linear_70_cast_fp16")];
+            string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2055_to_fp16 = const()[name = string("op_2055_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538315840)))];
+            tensor<fp16, [1280]> var_2056_to_fp16 = const()[name = string("op_2056_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551423104)))];
+            tensor<fp16, [1, ?, 1280]> linear_71_cast_fp16 = linear(bias = var_2056_to_fp16, weight = var_2055_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")];
+            tensor<int32, [4]> k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor<int32, [4]>([10, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_80)[name = string("k_cache_37_cast_fp16")];
+            tensor<int32, [4]> v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor<int32, [4]>([10, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_81)[name = string("v_cache_37_cast_fp16")];
+            tensor<int32, [4]> k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor<int32, [4]>([10, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")];
+            tensor<int32, [4]> v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor<int32, [4]>([10, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")];
+            int32 var_2079 = const()[name = string("op_2079"), val = int32(-1)];
+            tensor<int32, [1]> var_2097_axes_0 = const()[name = string("op_2097_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551425728)))];
+            tensor<fp16, [1280]> blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551428352)))];
+            fp16 var_2085_to_fp16 = const()[name = string("op_2085_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_2097_cast_fp16 = layer_norm(axes = var_2097_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2097_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2108_to_fp16 = const()[name = string("op_2108_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551430976)))];
+            tensor<fp16, [1280]> var_2109_to_fp16 = const()[name = string("op_2109_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554707840)))];
+            tensor<fp16, [1, ?, 1280]> linear_72_cast_fp16 = linear(bias = var_2109_to_fp16, weight = var_2108_to_fp16, x = var_2097_cast_fp16)[name = string("linear_72_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2112_to_fp16 = const()[name = string("op_2112_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554710464)))];
+            tensor<fp16, [1, ?, 1280]> linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2112_to_fp16, x = var_2097_cast_fp16)[name = string("linear_73_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2116_to_fp16 = const()[name = string("op_2116_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557987328)))];
+            tensor<fp16, [1280]> var_2117_to_fp16 = const()[name = string("op_2117_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561264192)))];
+            tensor<fp16, [1, ?, 1280]> linear_74_cast_fp16 = linear(bias = var_2117_to_fp16, weight = var_2116_to_fp16, x = var_2097_cast_fp16)[name = string("linear_74_cast_fp16")];
+            tensor<int32, [3]> var_2119_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2119_shape_cast_fp16")];
+            int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)];
+            int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)];
+            bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)];
+            string var_2119_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2119_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2119_shape_cast_fp16_to_uint16 = cast(dtype = var_2119_shape_cast_fp16_to_uint16_dtype_0, x = var_2119_shape_cast_fp16)[name = string("cast_372")];
+            uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2119_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")];
+            string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_371")];
+            int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")];
+            tensor<int32, [1]> expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")];
+            tensor<int32, [1]> concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor<int32, [1]>([9])];
+            int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)];
+            bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")];
+            tensor<int32, [1]> concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)];
+            bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_80)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_82_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_82 = read_state(input = k_cache1)[name = string("coreml_update_state_82")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_81)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_83_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_83 = read_state(input = v_cache1)[name = string("coreml_update_state_83")];
+            int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)];
+            int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(1280)];
+            int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)];
+            bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")];
+            tensor<int32, [3]> var_2135_begin_0 = const()[name = string("op_2135_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2135_end_mask_0 = const()[name = string("op_2135_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2135_cast_fp16 = slice_by_index(begin = var_2135_begin_0, end = concat_208, end_mask = var_2135_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2135_cast_fp16")];
+            tensor<int32, [3]> var_2138_begin_0 = const()[name = string("op_2138_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2138_end_mask_0 = const()[name = string("op_2138_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = concat_208, end_mask = var_2138_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2138_cast_fp16")];
+            tensor<int32, [4]> concat_210x = const()[name = string("concat_210x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2148_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2148_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_75_cast_fp16 = mul(x = var_2148_cast_fp16, y = const_196_to_fp16)[name = string("q_75_cast_fp16")];
+            tensor<int32, [4]> concat_211x = const()[name = string("concat_211x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2155_cast_fp16 = reshape(shape = concat_211x, x = var_2135_cast_fp16)[name = string("op_2155_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_95_cast_fp16 = mul(x = var_2155_cast_fp16, y = const_197_to_fp16)[name = string("k_95_cast_fp16")];
+            tensor<int32, [4]> concat_212x = const()[name = string("concat_212x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2162_cast_fp16 = reshape(shape = concat_212x, x = var_2138_cast_fp16)[name = string("op_2162_cast_fp16")];
+            tensor<int32, [4]> var_2163 = const()[name = string("op_2163"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)];
+            bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_293_perm_0 = const()[name = string("transpose_293_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_294_perm_0 = const()[name = string("transpose_294_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_294 = transpose(perm = transpose_294_perm_0, x = k_95_cast_fp16)[name = string("transpose_566")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_293 = transpose(perm = transpose_293_perm_0, x = q_75_cast_fp16)[name = string("transpose_567")];
+            tensor<fp16, [1, 20, ?, ?]> qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_293, y = transpose_294)[name = string("qk_55_cast_fp16")];
+            int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)];
+            int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)];
+            bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")];
+            tensor<int32, [2]> var_2166_begin_0 = const()[name = string("op_2166_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2166_end_mask_0 = const()[name = string("op_2166_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2166_cast_fp16 = slice_by_index(begin = var_2166_begin_0, end = concat_213, end_mask = var_2166_end_mask_0, x = mask_to_fp16)[name = string("op_2166_cast_fp16")];
+            int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)];
+            int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)];
+            bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")];
+            tensor<int32, [2]> var_2167_begin_0 = const()[name = string("op_2167_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2167_end_mask_0 = const()[name = string("op_2167_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2167_cast_fp16 = slice_by_index(begin = var_2167_begin_0, end = concat_214, end_mask = var_2167_end_mask_0, x = var_2166_cast_fp16)[name = string("op_2167_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2167_cast_fp16)[name = string("qk_57_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_2170_cast_fp16 = softmax(axis = var_2079, x = qk_57_cast_fp16)[name = string("op_2170_cast_fp16")];
+            bool var_2172_transpose_x_0 = const()[name = string("op_2172_transpose_x_0"), val = bool(false)];
+            bool var_2172_transpose_y_0 = const()[name = string("op_2172_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_95_cast_fp16 = transpose(perm = var_2163, x = var_2162_cast_fp16)[name = string("transpose_568")];
+            tensor<fp16, [1, 20, ?, 64]> var_2172_cast_fp16 = matmul(transpose_x = var_2172_transpose_x_0, transpose_y = var_2172_transpose_y_0, x = var_2170_cast_fp16, y = v_95_cast_fp16)[name = string("op_2172_cast_fp16")];
+            tensor<int32, [4]> var_2173 = const()[name = string("op_2173"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_215x = const()[name = string("concat_215x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2174_cast_fp16 = transpose(perm = var_2173, x = var_2172_cast_fp16)[name = string("transpose_565")];
+            tensor<fp16, [1, ?, 1280]> x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2174_cast_fp16)[name = string("x_169_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2178_to_fp16 = const()[name = string("op_2178_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561266816)))];
+            tensor<fp16, [1280]> var_2179_to_fp16 = const()[name = string("op_2179_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564543680)))];
+            tensor<fp16, [1, ?, 1280]> linear_75_cast_fp16 = linear(bias = var_2179_to_fp16, weight = var_2178_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")];
+            tensor<int32, [1]> var_2186_axes_0 = const()[name = string("op_2186_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564546304)))];
+            tensor<fp16, [1280]> blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564548928)))];
+            tensor<fp16, [1, ?, 1280]> var_2186_cast_fp16 = layer_norm(axes = var_2186_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2186_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2195_to_fp16 = const()[name = string("op_2195_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564551552)))];
+            tensor<fp16, [1280]> var_2196_to_fp16 = const()[name = string("op_2196_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567828416)))];
+            tensor<fp16, [1, ?, 1280]> linear_76_cast_fp16 = linear(bias = var_2196_to_fp16, weight = var_2195_to_fp16, x = var_2186_cast_fp16)[name = string("linear_76_cast_fp16")];
+            tensor<int32, [3]> concat_216 = const()[name = string("concat_216"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_217 = const()[name = string("concat_217"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_218 = const()[name = string("concat_218"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_219 = const()[name = string("concat_219"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_220x = const()[name = string("concat_220x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2216_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2216_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_79_cast_fp16 = mul(x = var_2216_cast_fp16, y = const_198_to_fp16)[name = string("q_79_cast_fp16")];
+            tensor<int32, [4]> var_2222 = const()[name = string("op_2222"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2223_cast_fp16 = reshape(shape = var_2222, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2223_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_99_cast_fp16 = mul(x = var_2223_cast_fp16, y = const_199_to_fp16)[name = string("k_99_cast_fp16")];
+            tensor<int32, [4]> var_2229 = const()[name = string("op_2229"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2230_cast_fp16 = reshape(shape = var_2229, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2230_cast_fp16")];
+            tensor<int32, [4]> var_2231 = const()[name = string("op_2231"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)];
+            bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_295_perm_0 = const()[name = string("transpose_295_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_296_perm_0 = const()[name = string("transpose_296_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_296 = transpose(perm = transpose_296_perm_0, x = k_99_cast_fp16)[name = string("transpose_562")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_295 = transpose(perm = transpose_295_perm_0, x = q_79_cast_fp16)[name = string("transpose_563")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_295, y = transpose_296)[name = string("qk_59_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_2235_cast_fp16 = softmax(axis = var_2079, x = qk_59_cast_fp16)[name = string("op_2235_cast_fp16")];
+            bool var_2237_transpose_x_0 = const()[name = string("op_2237_transpose_x_0"), val = bool(false)];
+            bool var_2237_transpose_y_0 = const()[name = string("op_2237_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_99_cast_fp16 = transpose(perm = var_2231, x = var_2230_cast_fp16)[name = string("transpose_564")];
+            tensor<fp16, [1, 20, ?, 64]> var_2237_cast_fp16 = matmul(transpose_x = var_2237_transpose_x_0, transpose_y = var_2237_transpose_y_0, x = var_2235_cast_fp16, y = v_99_cast_fp16)[name = string("op_2237_cast_fp16")];
+            tensor<int32, [4]> var_2238 = const()[name = string("op_2238"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_221x = const()[name = string("concat_221x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2239_cast_fp16 = transpose(perm = var_2238, x = var_2237_cast_fp16)[name = string("transpose_561")];
+            tensor<fp16, [1, ?, 1280]> x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2239_cast_fp16)[name = string("x_175_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2243_to_fp16 = const()[name = string("op_2243_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567831040)))];
+            tensor<fp16, [1280]> var_2244_to_fp16 = const()[name = string("op_2244_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571107904)))];
+            tensor<fp16, [1, ?, 1280]> linear_77_cast_fp16 = linear(bias = var_2244_to_fp16, weight = var_2243_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")];
+            tensor<int32, [1]> var_2251_axes_0 = const()[name = string("op_2251_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571110528)))];
+            tensor<fp16, [1280]> blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571113152)))];
+            tensor<fp16, [1, ?, 1280]> var_2251_cast_fp16 = layer_norm(axes = var_2251_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2251_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2260_to_fp16 = const()[name = string("op_2260_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571115776)))];
+            tensor<fp16, [5120]> var_2261_to_fp16 = const()[name = string("op_2261_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584223040)))];
+            tensor<fp16, [1, ?, 5120]> linear_78_cast_fp16 = linear(bias = var_2261_to_fp16, weight = var_2260_to_fp16, x = var_2251_cast_fp16)[name = string("linear_78_cast_fp16")];
+            string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2266_to_fp16 = const()[name = string("op_2266_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584233344)))];
+            tensor<fp16, [1280]> var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597340608)))];
+            tensor<fp16, [1, ?, 1280]> linear_79_cast_fp16 = linear(bias = var_2267_to_fp16, weight = var_2266_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")];
+            tensor<int32, [4]> k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor<int32, [4]>([11, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_82)[name = string("k_cache_41_cast_fp16")];
+            tensor<int32, [4]> v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor<int32, [4]>([11, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_83)[name = string("v_cache_41_cast_fp16")];
+            tensor<int32, [4]> k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor<int32, [4]>([11, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")];
+            tensor<int32, [4]> v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor<int32, [4]>([11, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")];
+            int32 var_2290 = const()[name = string("op_2290"), val = int32(-1)];
+            tensor<int32, [1]> var_2308_axes_0 = const()[name = string("op_2308_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597343232)))];
+            tensor<fp16, [1280]> blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597345856)))];
+            fp16 var_2296_to_fp16 = const()[name = string("op_2296_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_2308_cast_fp16 = layer_norm(axes = var_2308_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2308_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2319_to_fp16 = const()[name = string("op_2319_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597348480)))];
+            tensor<fp16, [1280]> var_2320_to_fp16 = const()[name = string("op_2320_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600625344)))];
+            tensor<fp16, [1, ?, 1280]> linear_80_cast_fp16 = linear(bias = var_2320_to_fp16, weight = var_2319_to_fp16, x = var_2308_cast_fp16)[name = string("linear_80_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2323_to_fp16 = const()[name = string("op_2323_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600627968)))];
+            tensor<fp16, [1, ?, 1280]> linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2323_to_fp16, x = var_2308_cast_fp16)[name = string("linear_81_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2327_to_fp16 = const()[name = string("op_2327_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(603904832)))];
+            tensor<fp16, [1280]> var_2328_to_fp16 = const()[name = string("op_2328_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607181696)))];
+            tensor<fp16, [1, ?, 1280]> linear_82_cast_fp16 = linear(bias = var_2328_to_fp16, weight = var_2327_to_fp16, x = var_2308_cast_fp16)[name = string("linear_82_cast_fp16")];
+            tensor<int32, [3]> var_2330_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2330_shape_cast_fp16")];
+            int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)];
+            int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)];
+            bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)];
+            string var_2330_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2330_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2330_shape_cast_fp16_to_uint16 = cast(dtype = var_2330_shape_cast_fp16_to_uint16_dtype_0, x = var_2330_shape_cast_fp16)[name = string("cast_370")];
+            uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2330_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")];
+            string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_369")];
+            int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")];
+            tensor<int32, [1]> expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")];
+            tensor<int32, [1]> concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor<int32, [1]>([10])];
+            int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)];
+            bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")];
+            tensor<int32, [1]> concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)];
+            bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_82)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_84_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_84 = read_state(input = k_cache1)[name = string("coreml_update_state_84")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_83)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_85_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_85 = read_state(input = v_cache1)[name = string("coreml_update_state_85")];
+            int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)];
+            int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(1280)];
+            int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)];
+            bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")];
+            tensor<int32, [3]> var_2346_begin_0 = const()[name = string("op_2346_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2346_end_mask_0 = const()[name = string("op_2346_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = concat_230, end_mask = var_2346_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2346_cast_fp16")];
+            tensor<int32, [3]> var_2349_begin_0 = const()[name = string("op_2349_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2349_end_mask_0 = const()[name = string("op_2349_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2349_cast_fp16 = slice_by_index(begin = var_2349_begin_0, end = concat_230, end_mask = var_2349_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2349_cast_fp16")];
+            tensor<int32, [4]> concat_232x = const()[name = string("concat_232x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2359_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2359_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_83_cast_fp16 = mul(x = var_2359_cast_fp16, y = const_200_to_fp16)[name = string("q_83_cast_fp16")];
+            tensor<int32, [4]> concat_233x = const()[name = string("concat_233x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2366_cast_fp16 = reshape(shape = concat_233x, x = var_2346_cast_fp16)[name = string("op_2366_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_105_cast_fp16 = mul(x = var_2366_cast_fp16, y = const_201_to_fp16)[name = string("k_105_cast_fp16")];
+            tensor<int32, [4]> concat_234x = const()[name = string("concat_234x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2373_cast_fp16 = reshape(shape = concat_234x, x = var_2349_cast_fp16)[name = string("op_2373_cast_fp16")];
+            tensor<int32, [4]> var_2374 = const()[name = string("op_2374"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)];
+            bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_297_perm_0 = const()[name = string("transpose_297_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_298_perm_0 = const()[name = string("transpose_298_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_298 = transpose(perm = transpose_298_perm_0, x = k_105_cast_fp16)[name = string("transpose_558")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_297 = transpose(perm = transpose_297_perm_0, x = q_83_cast_fp16)[name = string("transpose_559")];
+            tensor<fp16, [1, 20, ?, ?]> qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_297, y = transpose_298)[name = string("qk_61_cast_fp16")];
+            int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)];
+            int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)];
+            bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")];
+            tensor<int32, [2]> var_2377_begin_0 = const()[name = string("op_2377_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2377_end_mask_0 = const()[name = string("op_2377_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2377_cast_fp16 = slice_by_index(begin = var_2377_begin_0, end = concat_235, end_mask = var_2377_end_mask_0, x = mask_to_fp16)[name = string("op_2377_cast_fp16")];
+            int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)];
+            int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)];
+            bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")];
+            tensor<int32, [2]> var_2378_begin_0 = const()[name = string("op_2378_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2378_end_mask_0 = const()[name = string("op_2378_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = concat_236, end_mask = var_2378_end_mask_0, x = var_2377_cast_fp16)[name = string("op_2378_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2378_cast_fp16)[name = string("qk_63_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_2381_cast_fp16 = softmax(axis = var_2290, x = qk_63_cast_fp16)[name = string("op_2381_cast_fp16")];
+            bool var_2383_transpose_x_0 = const()[name = string("op_2383_transpose_x_0"), val = bool(false)];
+            bool var_2383_transpose_y_0 = const()[name = string("op_2383_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_105_cast_fp16 = transpose(perm = var_2374, x = var_2373_cast_fp16)[name = string("transpose_560")];
+            tensor<fp16, [1, 20, ?, 64]> var_2383_cast_fp16 = matmul(transpose_x = var_2383_transpose_x_0, transpose_y = var_2383_transpose_y_0, x = var_2381_cast_fp16, y = v_105_cast_fp16)[name = string("op_2383_cast_fp16")];
+            tensor<int32, [4]> var_2384 = const()[name = string("op_2384"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_237x = const()[name = string("concat_237x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2385_cast_fp16 = transpose(perm = var_2384, x = var_2383_cast_fp16)[name = string("transpose_557")];
+            tensor<fp16, [1, ?, 1280]> x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2385_cast_fp16)[name = string("x_187_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2389_to_fp16 = const()[name = string("op_2389_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607184320)))];
+            tensor<fp16, [1280]> var_2390_to_fp16 = const()[name = string("op_2390_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610461184)))];
+            tensor<fp16, [1, ?, 1280]> linear_83_cast_fp16 = linear(bias = var_2390_to_fp16, weight = var_2389_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")];
+            tensor<int32, [1]> var_2397_axes_0 = const()[name = string("op_2397_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610463808)))];
+            tensor<fp16, [1280]> blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610466432)))];
+            tensor<fp16, [1, ?, 1280]> var_2397_cast_fp16 = layer_norm(axes = var_2397_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2397_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2406_to_fp16 = const()[name = string("op_2406_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610469056)))];
+            tensor<fp16, [1280]> var_2407_to_fp16 = const()[name = string("op_2407_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613745920)))];
+            tensor<fp16, [1, ?, 1280]> linear_84_cast_fp16 = linear(bias = var_2407_to_fp16, weight = var_2406_to_fp16, x = var_2397_cast_fp16)[name = string("linear_84_cast_fp16")];
+            tensor<int32, [3]> concat_238 = const()[name = string("concat_238"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_239 = const()[name = string("concat_239"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_240 = const()[name = string("concat_240"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_241 = const()[name = string("concat_241"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_242x = const()[name = string("concat_242x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2427_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2427_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_87_cast_fp16 = mul(x = var_2427_cast_fp16, y = const_202_to_fp16)[name = string("q_87_cast_fp16")];
+            tensor<int32, [4]> var_2433 = const()[name = string("op_2433"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2434_cast_fp16 = reshape(shape = var_2433, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2434_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_109_cast_fp16 = mul(x = var_2434_cast_fp16, y = const_203_to_fp16)[name = string("k_109_cast_fp16")];
+            tensor<int32, [4]> var_2440 = const()[name = string("op_2440"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2441_cast_fp16 = reshape(shape = var_2440, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2441_cast_fp16")];
+            tensor<int32, [4]> var_2442 = const()[name = string("op_2442"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)];
+            bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_299_perm_0 = const()[name = string("transpose_299_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_300_perm_0 = const()[name = string("transpose_300_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_300 = transpose(perm = transpose_300_perm_0, x = k_109_cast_fp16)[name = string("transpose_554")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_299 = transpose(perm = transpose_299_perm_0, x = q_87_cast_fp16)[name = string("transpose_555")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_299, y = transpose_300)[name = string("qk_65_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_2446_cast_fp16 = softmax(axis = var_2290, x = qk_65_cast_fp16)[name = string("op_2446_cast_fp16")];
+            bool var_2448_transpose_x_0 = const()[name = string("op_2448_transpose_x_0"), val = bool(false)];
+            bool var_2448_transpose_y_0 = const()[name = string("op_2448_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_109_cast_fp16 = transpose(perm = var_2442, x = var_2441_cast_fp16)[name = string("transpose_556")];
+            tensor<fp16, [1, 20, ?, 64]> var_2448_cast_fp16 = matmul(transpose_x = var_2448_transpose_x_0, transpose_y = var_2448_transpose_y_0, x = var_2446_cast_fp16, y = v_109_cast_fp16)[name = string("op_2448_cast_fp16")];
+            tensor<int32, [4]> var_2449 = const()[name = string("op_2449"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_243x = const()[name = string("concat_243x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2450_cast_fp16 = transpose(perm = var_2449, x = var_2448_cast_fp16)[name = string("transpose_553")];
+            tensor<fp16, [1, ?, 1280]> x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2450_cast_fp16)[name = string("x_193_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2454_to_fp16 = const()[name = string("op_2454_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613748544)))];
+            tensor<fp16, [1280]> var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617025408)))];
+            tensor<fp16, [1, ?, 1280]> linear_85_cast_fp16 = linear(bias = var_2455_to_fp16, weight = var_2454_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")];
+            tensor<int32, [1]> var_2462_axes_0 = const()[name = string("op_2462_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617028032)))];
+            tensor<fp16, [1280]> blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617030656)))];
+            tensor<fp16, [1, ?, 1280]> var_2462_cast_fp16 = layer_norm(axes = var_2462_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2462_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2471_to_fp16 = const()[name = string("op_2471_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617033280)))];
+            tensor<fp16, [5120]> var_2472_to_fp16 = const()[name = string("op_2472_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630140544)))];
+            tensor<fp16, [1, ?, 5120]> linear_86_cast_fp16 = linear(bias = var_2472_to_fp16, weight = var_2471_to_fp16, x = var_2462_cast_fp16)[name = string("linear_86_cast_fp16")];
+            string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2477_to_fp16 = const()[name = string("op_2477_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630150848)))];
+            tensor<fp16, [1280]> var_2478_to_fp16 = const()[name = string("op_2478_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643258112)))];
+            tensor<fp16, [1, ?, 1280]> linear_87_cast_fp16 = linear(bias = var_2478_to_fp16, weight = var_2477_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")];
+            tensor<int32, [4]> k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor<int32, [4]>([12, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_84)[name = string("k_cache_45_cast_fp16")];
+            tensor<int32, [4]> v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor<int32, [4]>([12, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_85)[name = string("v_cache_45_cast_fp16")];
+            tensor<int32, [4]> k_cache_47_begin_0 = const()[name = string("k_cache_47_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_47_end_0 = const()[name = string("k_cache_47_end_0"), val = tensor<int32, [4]>([12, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_47_end_mask_0 = const()[name = string("k_cache_47_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_47_squeeze_mask_0 = const()[name = string("k_cache_47_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_47_cast_fp16 = slice_by_index(begin = k_cache_47_begin_0, end = k_cache_47_end_0, end_mask = k_cache_47_end_mask_0, squeeze_mask = k_cache_47_squeeze_mask_0, x = read_state_2)[name = string("k_cache_47_cast_fp16")];
+            tensor<int32, [4]> v_cache_47_begin_0 = const()[name = string("v_cache_47_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_47_end_0 = const()[name = string("v_cache_47_end_0"), val = tensor<int32, [4]>([12, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_47_end_mask_0 = const()[name = string("v_cache_47_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_47_squeeze_mask_0 = const()[name = string("v_cache_47_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_47_cast_fp16 = slice_by_index(begin = v_cache_47_begin_0, end = v_cache_47_end_0, end_mask = v_cache_47_end_mask_0, squeeze_mask = v_cache_47_squeeze_mask_0, x = read_state_3)[name = string("v_cache_47_cast_fp16")];
+            int32 var_2501 = const()[name = string("op_2501"), val = int32(-1)];
+            tensor<int32, [1]> var_2519_axes_0 = const()[name = string("op_2519_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643260736)))];
+            tensor<fp16, [1280]> blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643263360)))];
+            fp16 var_2507_to_fp16 = const()[name = string("op_2507_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_2519_cast_fp16 = layer_norm(axes = var_2519_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2519_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2530_to_fp16 = const()[name = string("op_2530_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643265984)))];
+            tensor<fp16, [1280]> var_2531_to_fp16 = const()[name = string("op_2531_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646542848)))];
+            tensor<fp16, [1, ?, 1280]> linear_88_cast_fp16 = linear(bias = var_2531_to_fp16, weight = var_2530_to_fp16, x = var_2519_cast_fp16)[name = string("linear_88_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2534_to_fp16 = const()[name = string("op_2534_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646545472)))];
+            tensor<fp16, [1, ?, 1280]> linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2534_to_fp16, x = var_2519_cast_fp16)[name = string("linear_89_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649822336)))];
+            tensor<fp16, [1280]> var_2539_to_fp16 = const()[name = string("op_2539_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653099200)))];
+            tensor<fp16, [1, ?, 1280]> linear_90_cast_fp16 = linear(bias = var_2539_to_fp16, weight = var_2538_to_fp16, x = var_2519_cast_fp16)[name = string("linear_90_cast_fp16")];
+            tensor<int32, [3]> var_2541_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2541_shape_cast_fp16")];
+            int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)];
+            int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)];
+            bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)];
+            string var_2541_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2541_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2541_shape_cast_fp16_to_uint16 = cast(dtype = var_2541_shape_cast_fp16_to_uint16_dtype_0, x = var_2541_shape_cast_fp16)[name = string("cast_368")];
+            uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2541_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")];
+            string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_367")];
+            int32 end_step_25 = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step_25")];
+            tensor<int32, [1]> expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step_25)[name = string("expand_dims_179")];
+            tensor<int32, [1]> concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor<int32, [1]>([11])];
+            int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)];
+            bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")];
+            tensor<int32, [1]> concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)];
+            bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_84)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_86_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_86 = read_state(input = k_cache1)[name = string("coreml_update_state_86")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_85)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_87_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_87 = read_state(input = v_cache1)[name = string("coreml_update_state_87")];
+            int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)];
+            int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(1280)];
+            int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)];
+            bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step_25, concat_252_values2_0))[name = string("concat_252")];
+            tensor<int32, [3]> var_2557_begin_0 = const()[name = string("op_2557_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2557_end_mask_0 = const()[name = string("op_2557_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2557_cast_fp16 = slice_by_index(begin = var_2557_begin_0, end = concat_252, end_mask = var_2557_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2557_cast_fp16")];
+            tensor<int32, [3]> var_2560_begin_0 = const()[name = string("op_2560_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2560_end_mask_0 = const()[name = string("op_2560_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2560_cast_fp16 = slice_by_index(begin = var_2560_begin_0, end = concat_252, end_mask = var_2560_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2560_cast_fp16")];
+            tensor<int32, [4]> concat_254x = const()[name = string("concat_254x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2570_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2570_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_91_cast_fp16 = mul(x = var_2570_cast_fp16, y = const_204_to_fp16)[name = string("q_91_cast_fp16")];
+            tensor<int32, [4]> concat_255x = const()[name = string("concat_255x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2577_cast_fp16 = reshape(shape = concat_255x, x = var_2557_cast_fp16)[name = string("op_2577_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_205_to_fp16 = const()[name = string("const_205_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_115_cast_fp16 = mul(x = var_2577_cast_fp16, y = const_205_to_fp16)[name = string("k_115_cast_fp16")];
+            tensor<int32, [4]> concat_256x = const()[name = string("concat_256x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2584_cast_fp16 = reshape(shape = concat_256x, x = var_2560_cast_fp16)[name = string("op_2584_cast_fp16")];
+            tensor<int32, [4]> var_2585 = const()[name = string("op_2585"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)];
+            bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_301_perm_0 = const()[name = string("transpose_301_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_302_perm_0 = const()[name = string("transpose_302_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_302 = transpose(perm = transpose_302_perm_0, x = k_115_cast_fp16)[name = string("transpose_550")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_301 = transpose(perm = transpose_301_perm_0, x = q_91_cast_fp16)[name = string("transpose_551")];
+            tensor<fp16, [1, 20, ?, ?]> qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_301, y = transpose_302)[name = string("qk_67_cast_fp16")];
+            int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)];
+            int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)];
+            bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")];
+            tensor<int32, [2]> var_2588_begin_0 = const()[name = string("op_2588_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2588_end_mask_0 = const()[name = string("op_2588_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2588_cast_fp16 = slice_by_index(begin = var_2588_begin_0, end = concat_257, end_mask = var_2588_end_mask_0, x = mask_to_fp16)[name = string("op_2588_cast_fp16")];
+            int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)];
+            int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)];
+            bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")];
+            tensor<int32, [2]> var_2589_begin_0 = const()[name = string("op_2589_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2589_end_mask_0 = const()[name = string("op_2589_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2589_cast_fp16 = slice_by_index(begin = var_2589_begin_0, end = concat_258, end_mask = var_2589_end_mask_0, x = var_2588_cast_fp16)[name = string("op_2589_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2589_cast_fp16)[name = string("qk_69_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_2592_cast_fp16 = softmax(axis = var_2501, x = qk_69_cast_fp16)[name = string("op_2592_cast_fp16")];
+            bool var_2594_transpose_x_0 = const()[name = string("op_2594_transpose_x_0"), val = bool(false)];
+            bool var_2594_transpose_y_0 = const()[name = string("op_2594_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_115_cast_fp16 = transpose(perm = var_2585, x = var_2584_cast_fp16)[name = string("transpose_552")];
+            tensor<fp16, [1, 20, ?, 64]> var_2594_cast_fp16 = matmul(transpose_x = var_2594_transpose_x_0, transpose_y = var_2594_transpose_y_0, x = var_2592_cast_fp16, y = v_115_cast_fp16)[name = string("op_2594_cast_fp16")];
+            tensor<int32, [4]> var_2595 = const()[name = string("op_2595"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_259x = const()[name = string("concat_259x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2596_cast_fp16 = transpose(perm = var_2595, x = var_2594_cast_fp16)[name = string("transpose_549")];
+            tensor<fp16, [1, ?, 1280]> x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2596_cast_fp16)[name = string("x_205_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2600_to_fp16 = const()[name = string("op_2600_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653101824)))];
+            tensor<fp16, [1280]> var_2601_to_fp16 = const()[name = string("op_2601_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656378688)))];
+            tensor<fp16, [1, ?, 1280]> linear_91_cast_fp16 = linear(bias = var_2601_to_fp16, weight = var_2600_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")];
+            tensor<int32, [1]> var_2608_axes_0 = const()[name = string("op_2608_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656381312)))];
+            tensor<fp16, [1280]> blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656383936)))];
+            tensor<fp16, [1, ?, 1280]> var_2608_cast_fp16 = layer_norm(axes = var_2608_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2608_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2617_to_fp16 = const()[name = string("op_2617_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656386560)))];
+            tensor<fp16, [1280]> var_2618_to_fp16 = const()[name = string("op_2618_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659663424)))];
+            tensor<fp16, [1, ?, 1280]> linear_92_cast_fp16 = linear(bias = var_2618_to_fp16, weight = var_2617_to_fp16, x = var_2608_cast_fp16)[name = string("linear_92_cast_fp16")];
+            tensor<int32, [3]> concat_260 = const()[name = string("concat_260"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_261 = const()[name = string("concat_261"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_262 = const()[name = string("concat_262"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_263 = const()[name = string("concat_263"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_264x = const()[name = string("concat_264x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2638_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2638_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_95_cast_fp16 = mul(x = var_2638_cast_fp16, y = const_206_to_fp16)[name = string("q_95_cast_fp16")];
+            tensor<int32, [4]> var_2644 = const()[name = string("op_2644"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2645_cast_fp16 = reshape(shape = var_2644, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2645_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_119_cast_fp16 = mul(x = var_2645_cast_fp16, y = const_207_to_fp16)[name = string("k_119_cast_fp16")];
+            tensor<int32, [4]> var_2651 = const()[name = string("op_2651"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2652_cast_fp16 = reshape(shape = var_2651, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2652_cast_fp16")];
+            tensor<int32, [4]> var_2653 = const()[name = string("op_2653"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_71_transpose_x_0 = const()[name = string("qk_71_transpose_x_0"), val = bool(false)];
+            bool qk_71_transpose_y_0 = const()[name = string("qk_71_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_303_perm_0 = const()[name = string("transpose_303_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_304_perm_0 = const()[name = string("transpose_304_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_304 = transpose(perm = transpose_304_perm_0, x = k_119_cast_fp16)[name = string("transpose_546")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_303 = transpose(perm = transpose_303_perm_0, x = q_95_cast_fp16)[name = string("transpose_547")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_71_cast_fp16 = matmul(transpose_x = qk_71_transpose_x_0, transpose_y = qk_71_transpose_y_0, x = transpose_303, y = transpose_304)[name = string("qk_71_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_2657_cast_fp16 = softmax(axis = var_2501, x = qk_71_cast_fp16)[name = string("op_2657_cast_fp16")];
+            bool var_2659_transpose_x_0 = const()[name = string("op_2659_transpose_x_0"), val = bool(false)];
+            bool var_2659_transpose_y_0 = const()[name = string("op_2659_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_119_cast_fp16 = transpose(perm = var_2653, x = var_2652_cast_fp16)[name = string("transpose_548")];
+            tensor<fp16, [1, 20, ?, 64]> var_2659_cast_fp16 = matmul(transpose_x = var_2659_transpose_x_0, transpose_y = var_2659_transpose_y_0, x = var_2657_cast_fp16, y = v_119_cast_fp16)[name = string("op_2659_cast_fp16")];
+            tensor<int32, [4]> var_2660 = const()[name = string("op_2660"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_265x = const()[name = string("concat_265x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2661_cast_fp16 = transpose(perm = var_2660, x = var_2659_cast_fp16)[name = string("transpose_545")];
+            tensor<fp16, [1, ?, 1280]> x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2661_cast_fp16)[name = string("x_211_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2665_to_fp16 = const()[name = string("op_2665_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659666048)))];
+            tensor<fp16, [1280]> var_2666_to_fp16 = const()[name = string("op_2666_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662942912)))];
+            tensor<fp16, [1, ?, 1280]> linear_93_cast_fp16 = linear(bias = var_2666_to_fp16, weight = var_2665_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")];
+            tensor<int32, [1]> var_2673_axes_0 = const()[name = string("op_2673_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662945536)))];
+            tensor<fp16, [1280]> blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662948160)))];
+            tensor<fp16, [1, ?, 1280]> var_2673_cast_fp16 = layer_norm(axes = var_2673_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2673_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2682_to_fp16 = const()[name = string("op_2682_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662950784)))];
+            tensor<fp16, [5120]> var_2683_to_fp16 = const()[name = string("op_2683_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676058048)))];
+            tensor<fp16, [1, ?, 5120]> linear_94_cast_fp16 = linear(bias = var_2683_to_fp16, weight = var_2682_to_fp16, x = var_2673_cast_fp16)[name = string("linear_94_cast_fp16")];
+            string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2688_to_fp16 = const()[name = string("op_2688_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676068352)))];
+            tensor<fp16, [1280]> var_2689_to_fp16 = const()[name = string("op_2689_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689175616)))];
+            tensor<fp16, [1, ?, 1280]> linear_95_cast_fp16 = linear(bias = var_2689_to_fp16, weight = var_2688_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")];
+            tensor<int32, [4]> k_cache_49_begin_0 = const()[name = string("k_cache_49_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_49_end_0 = const()[name = string("k_cache_49_end_0"), val = tensor<int32, [4]>([13, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_49_end_mask_0 = const()[name = string("k_cache_49_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_49_squeeze_mask_0 = const()[name = string("k_cache_49_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_49_cast_fp16 = slice_by_index(begin = k_cache_49_begin_0, end = k_cache_49_end_0, end_mask = k_cache_49_end_mask_0, squeeze_mask = k_cache_49_squeeze_mask_0, x = coreml_update_state_86)[name = string("k_cache_49_cast_fp16")];
+            tensor<int32, [4]> v_cache_49_begin_0 = const()[name = string("v_cache_49_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_49_end_0 = const()[name = string("v_cache_49_end_0"), val = tensor<int32, [4]>([13, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_49_end_mask_0 = const()[name = string("v_cache_49_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_49_squeeze_mask_0 = const()[name = string("v_cache_49_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_49_cast_fp16 = slice_by_index(begin = v_cache_49_begin_0, end = v_cache_49_end_0, end_mask = v_cache_49_end_mask_0, squeeze_mask = v_cache_49_squeeze_mask_0, x = coreml_update_state_87)[name = string("v_cache_49_cast_fp16")];
+            tensor<int32, [4]> k_cache_51_begin_0 = const()[name = string("k_cache_51_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_51_end_0 = const()[name = string("k_cache_51_end_0"), val = tensor<int32, [4]>([13, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_51_end_mask_0 = const()[name = string("k_cache_51_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_51_squeeze_mask_0 = const()[name = string("k_cache_51_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_51_cast_fp16 = slice_by_index(begin = k_cache_51_begin_0, end = k_cache_51_end_0, end_mask = k_cache_51_end_mask_0, squeeze_mask = k_cache_51_squeeze_mask_0, x = read_state_2)[name = string("k_cache_51_cast_fp16")];
+            tensor<int32, [4]> v_cache_51_begin_0 = const()[name = string("v_cache_51_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_51_end_0 = const()[name = string("v_cache_51_end_0"), val = tensor<int32, [4]>([13, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_51_end_mask_0 = const()[name = string("v_cache_51_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_51_squeeze_mask_0 = const()[name = string("v_cache_51_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_51_cast_fp16 = slice_by_index(begin = v_cache_51_begin_0, end = v_cache_51_end_0, end_mask = v_cache_51_end_mask_0, squeeze_mask = v_cache_51_squeeze_mask_0, x = read_state_3)[name = string("v_cache_51_cast_fp16")];
+            int32 var_2712 = const()[name = string("op_2712"), val = int32(-1)];
+            tensor<int32, [1]> var_2730_axes_0 = const()[name = string("op_2730_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689178240)))];
+            tensor<fp16, [1280]> blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689180864)))];
+            fp16 var_2718_to_fp16 = const()[name = string("op_2718_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_2730_cast_fp16 = layer_norm(axes = var_2730_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2730_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689183488)))];
+            tensor<fp16, [1280]> var_2742_to_fp16 = const()[name = string("op_2742_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692460352)))];
+            tensor<fp16, [1, ?, 1280]> linear_96_cast_fp16 = linear(bias = var_2742_to_fp16, weight = var_2741_to_fp16, x = var_2730_cast_fp16)[name = string("linear_96_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2745_to_fp16 = const()[name = string("op_2745_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692462976)))];
+            tensor<fp16, [1, ?, 1280]> linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2745_to_fp16, x = var_2730_cast_fp16)[name = string("linear_97_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2749_to_fp16 = const()[name = string("op_2749_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695739840)))];
+            tensor<fp16, [1280]> var_2750_to_fp16 = const()[name = string("op_2750_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(699016704)))];
+            tensor<fp16, [1, ?, 1280]> linear_98_cast_fp16 = linear(bias = var_2750_to_fp16, weight = var_2749_to_fp16, x = var_2730_cast_fp16)[name = string("linear_98_cast_fp16")];
+            tensor<int32, [3]> var_2752_shape_cast_fp16 = shape(x = linear_96_cast_fp16)[name = string("op_2752_shape_cast_fp16")];
+            int32 gather_146_axis_0 = const()[name = string("gather_146_axis_0"), val = int32(0)];
+            int32 gather_146_batch_dims_0 = const()[name = string("gather_146_batch_dims_0"), val = int32(0)];
+            bool gather_146_validate_indices_0 = const()[name = string("gather_146_validate_indices_0"), val = bool(false)];
+            string var_2752_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2752_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_146_to_uint16 = const()[name = string("select_146_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2752_shape_cast_fp16_to_uint16 = cast(dtype = var_2752_shape_cast_fp16_to_uint16_dtype_0, x = var_2752_shape_cast_fp16)[name = string("cast_366")];
+            uint16 gather_146_cast_uint16 = gather(axis = gather_146_axis_0, batch_dims = gather_146_batch_dims_0, indices = select_146_to_uint16, validate_indices = gather_146_validate_indices_0, x = var_2752_shape_cast_fp16_to_uint16)[name = string("gather_146_cast_uint16")];
+            string gather_146_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_146_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_146_cast_uint16_to_int32 = cast(dtype = gather_146_cast_uint16_to_int32_dtype_0, x = gather_146_cast_uint16)[name = string("cast_365")];
+            int32 end_step_27 = add(x = offset, y = gather_146_cast_uint16_to_int32)[name = string("end_step_27")];
+            tensor<int32, [1]> expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = end_step_27)[name = string("expand_dims_195")];
+            tensor<int32, [1]> concat_268_values0_0 = const()[name = string("concat_268_values0_0"), val = tensor<int32, [1]>([12])];
+            int32 concat_268_axis_0 = const()[name = string("concat_268_axis_0"), val = int32(0)];
+            bool concat_268_interleave_0 = const()[name = string("concat_268_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_268 = concat(axis = concat_268_axis_0, interleave = concat_268_interleave_0, values = (concat_268_values0_0, expand_dims_192, expand_dims_1, expand_dims_194))[name = string("concat_268")];
+            tensor<int32, [1]> concat_269_values0_0 = const()[name = string("concat_269_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_269_values1_0 = const()[name = string("concat_269_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_269_values3_0 = const()[name = string("concat_269_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_269_axis_0 = const()[name = string("concat_269_axis_0"), val = int32(0)];
+            bool concat_269_interleave_0 = const()[name = string("concat_269_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_269 = concat(axis = concat_269_axis_0, interleave = concat_269_interleave_0, values = (concat_269_values0_0, concat_269_values1_0, expand_dims_195, concat_269_values3_0))[name = string("concat_269")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = k_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = k_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_13_stride_0, update = linear_97_cast_fp16, x = coreml_update_state_86)[name = string("k_cache1_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_13_cast_fp16, input = k_cache1)[name = string("coreml_update_state_88_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_88 = read_state(input = k_cache1)[name = string("coreml_update_state_88")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = v_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = v_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_13_stride_0, update = linear_98_cast_fp16, x = coreml_update_state_87)[name = string("v_cache1_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_13_cast_fp16, input = v_cache1)[name = string("coreml_update_state_89_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_89 = read_state(input = v_cache1)[name = string("coreml_update_state_89")];
+            int32 concat_274_values0_0 = const()[name = string("concat_274_values0_0"), val = int32(1)];
+            int32 concat_274_values2_0 = const()[name = string("concat_274_values2_0"), val = int32(1280)];
+            int32 concat_274_axis_0 = const()[name = string("concat_274_axis_0"), val = int32(0)];
+            bool concat_274_interleave_0 = const()[name = string("concat_274_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_274 = concat(axis = concat_274_axis_0, interleave = concat_274_interleave_0, values = (concat_274_values0_0, end_step_27, concat_274_values2_0))[name = string("concat_274")];
+            tensor<int32, [3]> var_2768_begin_0 = const()[name = string("op_2768_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2768_end_mask_0 = const()[name = string("op_2768_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2768_cast_fp16 = slice_by_index(begin = var_2768_begin_0, end = concat_274, end_mask = var_2768_end_mask_0, x = k_cache_49_cast_fp16)[name = string("op_2768_cast_fp16")];
+            tensor<int32, [3]> var_2771_begin_0 = const()[name = string("op_2771_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2771_end_mask_0 = const()[name = string("op_2771_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2771_cast_fp16 = slice_by_index(begin = var_2771_begin_0, end = concat_274, end_mask = var_2771_end_mask_0, x = v_cache_49_cast_fp16)[name = string("op_2771_cast_fp16")];
+            tensor<int32, [4]> concat_276x = const()[name = string("concat_276x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2781_cast_fp16 = reshape(shape = concat_276x, x = linear_96_cast_fp16)[name = string("op_2781_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_208_to_fp16 = const()[name = string("const_208_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_99_cast_fp16 = mul(x = var_2781_cast_fp16, y = const_208_to_fp16)[name = string("q_99_cast_fp16")];
+            tensor<int32, [4]> concat_277x = const()[name = string("concat_277x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2788_cast_fp16 = reshape(shape = concat_277x, x = var_2768_cast_fp16)[name = string("op_2788_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_125_cast_fp16 = mul(x = var_2788_cast_fp16, y = const_209_to_fp16)[name = string("k_125_cast_fp16")];
+            tensor<int32, [4]> concat_278x = const()[name = string("concat_278x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2795_cast_fp16 = reshape(shape = concat_278x, x = var_2771_cast_fp16)[name = string("op_2795_cast_fp16")];
+            tensor<int32, [4]> var_2796 = const()[name = string("op_2796"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_73_transpose_x_0 = const()[name = string("qk_73_transpose_x_0"), val = bool(false)];
+            bool qk_73_transpose_y_0 = const()[name = string("qk_73_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_305_perm_0 = const()[name = string("transpose_305_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_306_perm_0 = const()[name = string("transpose_306_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_306 = transpose(perm = transpose_306_perm_0, x = k_125_cast_fp16)[name = string("transpose_542")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_305 = transpose(perm = transpose_305_perm_0, x = q_99_cast_fp16)[name = string("transpose_543")];
+            tensor<fp16, [1, 20, ?, ?]> qk_73_cast_fp16 = matmul(transpose_x = qk_73_transpose_x_0, transpose_y = qk_73_transpose_y_0, x = transpose_305, y = transpose_306)[name = string("qk_73_cast_fp16")];
+            int32 concat_279_values1_0 = const()[name = string("concat_279_values1_0"), val = int32(448)];
+            int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)];
+            bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_146_cast_uint16_to_int32, concat_279_values1_0))[name = string("concat_279")];
+            tensor<int32, [2]> var_2799_begin_0 = const()[name = string("op_2799_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2799_end_mask_0 = const()[name = string("op_2799_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2799_cast_fp16 = slice_by_index(begin = var_2799_begin_0, end = concat_279, end_mask = var_2799_end_mask_0, x = mask_to_fp16)[name = string("op_2799_cast_fp16")];
+            int32 concat_280_values0_0 = const()[name = string("concat_280_values0_0"), val = int32(0)];
+            int32 concat_280_axis_0 = const()[name = string("concat_280_axis_0"), val = int32(0)];
+            bool concat_280_interleave_0 = const()[name = string("concat_280_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_280 = concat(axis = concat_280_axis_0, interleave = concat_280_interleave_0, values = (concat_280_values0_0, gather_146_cast_uint16_to_int32))[name = string("concat_280")];
+            tensor<int32, [2]> var_2800_begin_0 = const()[name = string("op_2800_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2800_end_mask_0 = const()[name = string("op_2800_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2800_cast_fp16 = slice_by_index(begin = var_2800_begin_0, end = concat_280, end_mask = var_2800_end_mask_0, x = var_2799_cast_fp16)[name = string("op_2800_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_75_cast_fp16 = add(x = qk_73_cast_fp16, y = var_2800_cast_fp16)[name = string("qk_75_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_2803_cast_fp16 = softmax(axis = var_2712, x = qk_75_cast_fp16)[name = string("op_2803_cast_fp16")];
+            bool var_2805_transpose_x_0 = const()[name = string("op_2805_transpose_x_0"), val = bool(false)];
+            bool var_2805_transpose_y_0 = const()[name = string("op_2805_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_125_cast_fp16 = transpose(perm = var_2796, x = var_2795_cast_fp16)[name = string("transpose_544")];
+            tensor<fp16, [1, 20, ?, 64]> var_2805_cast_fp16 = matmul(transpose_x = var_2805_transpose_x_0, transpose_y = var_2805_transpose_y_0, x = var_2803_cast_fp16, y = v_125_cast_fp16)[name = string("op_2805_cast_fp16")];
+            tensor<int32, [4]> var_2806 = const()[name = string("op_2806"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_281x = const()[name = string("concat_281x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2807_cast_fp16 = transpose(perm = var_2806, x = var_2805_cast_fp16)[name = string("transpose_541")];
+            tensor<fp16, [1, ?, 1280]> x_223_cast_fp16 = reshape(shape = concat_281x, x = var_2807_cast_fp16)[name = string("x_223_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2811_to_fp16 = const()[name = string("op_2811_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(699019328)))];
+            tensor<fp16, [1280]> var_2812_to_fp16 = const()[name = string("op_2812_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702296192)))];
+            tensor<fp16, [1, ?, 1280]> linear_99_cast_fp16 = linear(bias = var_2812_to_fp16, weight = var_2811_to_fp16, x = x_223_cast_fp16)[name = string("linear_99_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_225_cast_fp16 = add(x = x_219_cast_fp16, y = linear_99_cast_fp16)[name = string("x_225_cast_fp16")];
+            tensor<int32, [1]> var_2819_axes_0 = const()[name = string("op_2819_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_12_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702298816)))];
+            tensor<fp16, [1280]> blocks_12_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702301440)))];
+            tensor<fp16, [1, ?, 1280]> var_2819_cast_fp16 = layer_norm(axes = var_2819_axes_0, beta = blocks_12_cross_attn_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_cross_attn_ln_weight_to_fp16, x = x_225_cast_fp16)[name = string("op_2819_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2828_to_fp16 = const()[name = string("op_2828_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702304064)))];
+            tensor<fp16, [1280]> var_2829_to_fp16 = const()[name = string("op_2829_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705580928)))];
+            tensor<fp16, [1, ?, 1280]> linear_100_cast_fp16 = linear(bias = var_2829_to_fp16, weight = var_2828_to_fp16, x = var_2819_cast_fp16)[name = string("linear_100_cast_fp16")];
+            tensor<int32, [3]> concat_282 = const()[name = string("concat_282"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_283 = const()[name = string("concat_283"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_127_internal_tensor_assign_1_stride_0 = const()[name = string("k_127_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_282, begin_mask = k_127_internal_tensor_assign_1_begin_mask_0, end = concat_283, end_mask = k_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_127_internal_tensor_assign_1_squeeze_mask_0, stride = k_127_internal_tensor_assign_1_stride_0, update = k_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("k_127_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_284 = const()[name = string("concat_284"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_285 = const()[name = string("concat_285"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_127_internal_tensor_assign_1_stride_0 = const()[name = string("v_127_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_284, begin_mask = v_127_internal_tensor_assign_1_begin_mask_0, end = concat_285, end_mask = v_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_127_internal_tensor_assign_1_squeeze_mask_0, stride = v_127_internal_tensor_assign_1_stride_0, update = v_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("v_127_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_286x = const()[name = string("concat_286x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2849_cast_fp16 = reshape(shape = concat_286x, x = linear_100_cast_fp16)[name = string("op_2849_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_103_cast_fp16 = mul(x = var_2849_cast_fp16, y = const_210_to_fp16)[name = string("q_103_cast_fp16")];
+            tensor<int32, [4]> var_2855 = const()[name = string("op_2855"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2856_cast_fp16 = reshape(shape = var_2855, x = k_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2856_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_129_cast_fp16 = mul(x = var_2856_cast_fp16, y = const_211_to_fp16)[name = string("k_129_cast_fp16")];
+            tensor<int32, [4]> var_2862 = const()[name = string("op_2862"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2863_cast_fp16 = reshape(shape = var_2862, x = v_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2863_cast_fp16")];
+            tensor<int32, [4]> var_2864 = const()[name = string("op_2864"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_77_transpose_x_0 = const()[name = string("qk_77_transpose_x_0"), val = bool(false)];
+            bool qk_77_transpose_y_0 = const()[name = string("qk_77_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_307_perm_0 = const()[name = string("transpose_307_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_308_perm_0 = const()[name = string("transpose_308_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_308 = transpose(perm = transpose_308_perm_0, x = k_129_cast_fp16)[name = string("transpose_538")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_307 = transpose(perm = transpose_307_perm_0, x = q_103_cast_fp16)[name = string("transpose_539")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_77_cast_fp16 = matmul(transpose_x = qk_77_transpose_x_0, transpose_y = qk_77_transpose_y_0, x = transpose_307, y = transpose_308)[name = string("qk_77_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_2868_cast_fp16 = softmax(axis = var_2712, x = qk_77_cast_fp16)[name = string("op_2868_cast_fp16")];
+            bool var_2870_transpose_x_0 = const()[name = string("op_2870_transpose_x_0"), val = bool(false)];
+            bool var_2870_transpose_y_0 = const()[name = string("op_2870_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_129_cast_fp16 = transpose(perm = var_2864, x = var_2863_cast_fp16)[name = string("transpose_540")];
+            tensor<fp16, [1, 20, ?, 64]> var_2870_cast_fp16 = matmul(transpose_x = var_2870_transpose_x_0, transpose_y = var_2870_transpose_y_0, x = var_2868_cast_fp16, y = v_129_cast_fp16)[name = string("op_2870_cast_fp16")];
+            tensor<int32, [4]> var_2871 = const()[name = string("op_2871"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_287x = const()[name = string("concat_287x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_2872_cast_fp16 = transpose(perm = var_2871, x = var_2870_cast_fp16)[name = string("transpose_537")];
+            tensor<fp16, [1, ?, 1280]> x_229_cast_fp16 = reshape(shape = concat_287x, x = var_2872_cast_fp16)[name = string("x_229_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2876_to_fp16 = const()[name = string("op_2876_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705583552)))];
+            tensor<fp16, [1280]> var_2877_to_fp16 = const()[name = string("op_2877_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708860416)))];
+            tensor<fp16, [1, ?, 1280]> linear_101_cast_fp16 = linear(bias = var_2877_to_fp16, weight = var_2876_to_fp16, x = x_229_cast_fp16)[name = string("linear_101_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_231_cast_fp16 = add(x = x_225_cast_fp16, y = linear_101_cast_fp16)[name = string("x_231_cast_fp16")];
+            tensor<int32, [1]> var_2884_axes_0 = const()[name = string("op_2884_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708863040)))];
+            tensor<fp16, [1280]> blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708865664)))];
+            tensor<fp16, [1, ?, 1280]> var_2884_cast_fp16 = layer_norm(axes = var_2884_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_231_cast_fp16)[name = string("op_2884_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2893_to_fp16 = const()[name = string("op_2893_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708868288)))];
+            tensor<fp16, [5120]> var_2894_to_fp16 = const()[name = string("op_2894_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721975552)))];
+            tensor<fp16, [1, ?, 5120]> linear_102_cast_fp16 = linear(bias = var_2894_to_fp16, weight = var_2893_to_fp16, x = var_2884_cast_fp16)[name = string("linear_102_cast_fp16")];
+            string x_235_mode_0 = const()[name = string("x_235_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_235_cast_fp16 = gelu(mode = x_235_mode_0, x = linear_102_cast_fp16)[name = string("x_235_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2899_to_fp16 = const()[name = string("op_2899_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721985856)))];
+            tensor<fp16, [1280]> var_2900_to_fp16 = const()[name = string("op_2900_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735093120)))];
+            tensor<fp16, [1, ?, 1280]> linear_103_cast_fp16 = linear(bias = var_2900_to_fp16, weight = var_2899_to_fp16, x = x_235_cast_fp16)[name = string("linear_103_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_237_cast_fp16 = add(x = x_231_cast_fp16, y = linear_103_cast_fp16)[name = string("x_237_cast_fp16")];
+            tensor<int32, [4]> k_cache_53_begin_0 = const()[name = string("k_cache_53_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_53_end_0 = const()[name = string("k_cache_53_end_0"), val = tensor<int32, [4]>([14, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_53_end_mask_0 = const()[name = string("k_cache_53_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_53_squeeze_mask_0 = const()[name = string("k_cache_53_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_53_cast_fp16 = slice_by_index(begin = k_cache_53_begin_0, end = k_cache_53_end_0, end_mask = k_cache_53_end_mask_0, squeeze_mask = k_cache_53_squeeze_mask_0, x = coreml_update_state_88)[name = string("k_cache_53_cast_fp16")];
+            tensor<int32, [4]> v_cache_53_begin_0 = const()[name = string("v_cache_53_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_53_end_0 = const()[name = string("v_cache_53_end_0"), val = tensor<int32, [4]>([14, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_53_end_mask_0 = const()[name = string("v_cache_53_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_53_squeeze_mask_0 = const()[name = string("v_cache_53_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_53_cast_fp16 = slice_by_index(begin = v_cache_53_begin_0, end = v_cache_53_end_0, end_mask = v_cache_53_end_mask_0, squeeze_mask = v_cache_53_squeeze_mask_0, x = coreml_update_state_89)[name = string("v_cache_53_cast_fp16")];
+            tensor<int32, [4]> k_cache_55_begin_0 = const()[name = string("k_cache_55_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_55_end_0 = const()[name = string("k_cache_55_end_0"), val = tensor<int32, [4]>([14, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_55_end_mask_0 = const()[name = string("k_cache_55_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_55_squeeze_mask_0 = const()[name = string("k_cache_55_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_55_cast_fp16 = slice_by_index(begin = k_cache_55_begin_0, end = k_cache_55_end_0, end_mask = k_cache_55_end_mask_0, squeeze_mask = k_cache_55_squeeze_mask_0, x = read_state_2)[name = string("k_cache_55_cast_fp16")];
+            tensor<int32, [4]> v_cache_55_begin_0 = const()[name = string("v_cache_55_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_55_end_0 = const()[name = string("v_cache_55_end_0"), val = tensor<int32, [4]>([14, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_55_end_mask_0 = const()[name = string("v_cache_55_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_55_squeeze_mask_0 = const()[name = string("v_cache_55_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_55_cast_fp16 = slice_by_index(begin = v_cache_55_begin_0, end = v_cache_55_end_0, end_mask = v_cache_55_end_mask_0, squeeze_mask = v_cache_55_squeeze_mask_0, x = read_state_3)[name = string("v_cache_55_cast_fp16")];
+            int32 var_2923 = const()[name = string("op_2923"), val = int32(-1)];
+            tensor<int32, [1]> var_2941_axes_0 = const()[name = string("op_2941_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735095744)))];
+            tensor<fp16, [1280]> blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735098368)))];
+            fp16 var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_2941_cast_fp16 = layer_norm(axes = var_2941_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_237_cast_fp16)[name = string("op_2941_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2952_to_fp16 = const()[name = string("op_2952_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735100992)))];
+            tensor<fp16, [1280]> var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738377856)))];
+            tensor<fp16, [1, ?, 1280]> linear_104_cast_fp16 = linear(bias = var_2953_to_fp16, weight = var_2952_to_fp16, x = var_2941_cast_fp16)[name = string("linear_104_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2956_to_fp16 = const()[name = string("op_2956_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738380480)))];
+            tensor<fp16, [1, ?, 1280]> linear_105_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2956_to_fp16, x = var_2941_cast_fp16)[name = string("linear_105_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2960_to_fp16 = const()[name = string("op_2960_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(741657344)))];
+            tensor<fp16, [1280]> var_2961_to_fp16 = const()[name = string("op_2961_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744934208)))];
+            tensor<fp16, [1, ?, 1280]> linear_106_cast_fp16 = linear(bias = var_2961_to_fp16, weight = var_2960_to_fp16, x = var_2941_cast_fp16)[name = string("linear_106_cast_fp16")];
+            tensor<int32, [3]> var_2963_shape_cast_fp16 = shape(x = linear_104_cast_fp16)[name = string("op_2963_shape_cast_fp16")];
+            int32 gather_158_axis_0 = const()[name = string("gather_158_axis_0"), val = int32(0)];
+            int32 gather_158_batch_dims_0 = const()[name = string("gather_158_batch_dims_0"), val = int32(0)];
+            bool gather_158_validate_indices_0 = const()[name = string("gather_158_validate_indices_0"), val = bool(false)];
+            string var_2963_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2963_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_158_to_uint16 = const()[name = string("select_158_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2963_shape_cast_fp16_to_uint16 = cast(dtype = var_2963_shape_cast_fp16_to_uint16_dtype_0, x = var_2963_shape_cast_fp16)[name = string("cast_364")];
+            uint16 gather_158_cast_uint16 = gather(axis = gather_158_axis_0, batch_dims = gather_158_batch_dims_0, indices = select_158_to_uint16, validate_indices = gather_158_validate_indices_0, x = var_2963_shape_cast_fp16_to_uint16)[name = string("gather_158_cast_uint16")];
+            string gather_158_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_158_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_158_cast_uint16_to_int32 = cast(dtype = gather_158_cast_uint16_to_int32_dtype_0, x = gather_158_cast_uint16)[name = string("cast_363")];
+            int32 end_step_29 = add(x = offset, y = gather_158_cast_uint16_to_int32)[name = string("end_step_29")];
+            tensor<int32, [1]> expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = end_step_29)[name = string("expand_dims_211")];
+            tensor<int32, [1]> concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor<int32, [1]>([13])];
+            int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)];
+            bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_208, expand_dims_1, expand_dims_210))[name = string("concat_290")];
+            tensor<int32, [1]> concat_291_values0_0 = const()[name = string("concat_291_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)];
+            bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (concat_291_values0_0, concat_291_values1_0, expand_dims_211, concat_291_values3_0))[name = string("concat_291")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = k_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = k_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_14_stride_0, update = linear_105_cast_fp16, x = coreml_update_state_88)[name = string("k_cache1_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_14_cast_fp16, input = k_cache1)[name = string("coreml_update_state_90_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_90 = read_state(input = k_cache1)[name = string("coreml_update_state_90")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = v_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = v_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_14_stride_0, update = linear_106_cast_fp16, x = coreml_update_state_89)[name = string("v_cache1_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_14_cast_fp16, input = v_cache1)[name = string("coreml_update_state_91_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_91 = read_state(input = v_cache1)[name = string("coreml_update_state_91")];
+            int32 concat_296_values0_0 = const()[name = string("concat_296_values0_0"), val = int32(1)];
+            int32 concat_296_values2_0 = const()[name = string("concat_296_values2_0"), val = int32(1280)];
+            int32 concat_296_axis_0 = const()[name = string("concat_296_axis_0"), val = int32(0)];
+            bool concat_296_interleave_0 = const()[name = string("concat_296_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_296 = concat(axis = concat_296_axis_0, interleave = concat_296_interleave_0, values = (concat_296_values0_0, end_step_29, concat_296_values2_0))[name = string("concat_296")];
+            tensor<int32, [3]> var_2979_begin_0 = const()[name = string("op_2979_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2979_end_mask_0 = const()[name = string("op_2979_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2979_cast_fp16 = slice_by_index(begin = var_2979_begin_0, end = concat_296, end_mask = var_2979_end_mask_0, x = k_cache_53_cast_fp16)[name = string("op_2979_cast_fp16")];
+            tensor<int32, [3]> var_2982_begin_0 = const()[name = string("op_2982_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2982_end_mask_0 = const()[name = string("op_2982_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_2982_cast_fp16 = slice_by_index(begin = var_2982_begin_0, end = concat_296, end_mask = var_2982_end_mask_0, x = v_cache_53_cast_fp16)[name = string("op_2982_cast_fp16")];
+            tensor<int32, [4]> concat_298x = const()[name = string("concat_298x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2992_cast_fp16 = reshape(shape = concat_298x, x = linear_104_cast_fp16)[name = string("op_2992_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_107_cast_fp16 = mul(x = var_2992_cast_fp16, y = const_212_to_fp16)[name = string("q_107_cast_fp16")];
+            tensor<int32, [4]> concat_299x = const()[name = string("concat_299x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_2999_cast_fp16 = reshape(shape = concat_299x, x = var_2979_cast_fp16)[name = string("op_2999_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_135_cast_fp16 = mul(x = var_2999_cast_fp16, y = const_213_to_fp16)[name = string("k_135_cast_fp16")];
+            tensor<int32, [4]> concat_300x = const()[name = string("concat_300x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3006_cast_fp16 = reshape(shape = concat_300x, x = var_2982_cast_fp16)[name = string("op_3006_cast_fp16")];
+            tensor<int32, [4]> var_3007 = const()[name = string("op_3007"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_79_transpose_x_0 = const()[name = string("qk_79_transpose_x_0"), val = bool(false)];
+            bool qk_79_transpose_y_0 = const()[name = string("qk_79_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_309_perm_0 = const()[name = string("transpose_309_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_310_perm_0 = const()[name = string("transpose_310_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_310 = transpose(perm = transpose_310_perm_0, x = k_135_cast_fp16)[name = string("transpose_534")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_309 = transpose(perm = transpose_309_perm_0, x = q_107_cast_fp16)[name = string("transpose_535")];
+            tensor<fp16, [1, 20, ?, ?]> qk_79_cast_fp16 = matmul(transpose_x = qk_79_transpose_x_0, transpose_y = qk_79_transpose_y_0, x = transpose_309, y = transpose_310)[name = string("qk_79_cast_fp16")];
+            int32 concat_301_values1_0 = const()[name = string("concat_301_values1_0"), val = int32(448)];
+            int32 concat_301_axis_0 = const()[name = string("concat_301_axis_0"), val = int32(0)];
+            bool concat_301_interleave_0 = const()[name = string("concat_301_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_301 = concat(axis = concat_301_axis_0, interleave = concat_301_interleave_0, values = (gather_158_cast_uint16_to_int32, concat_301_values1_0))[name = string("concat_301")];
+            tensor<int32, [2]> var_3010_begin_0 = const()[name = string("op_3010_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3010_end_mask_0 = const()[name = string("op_3010_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3010_cast_fp16 = slice_by_index(begin = var_3010_begin_0, end = concat_301, end_mask = var_3010_end_mask_0, x = mask_to_fp16)[name = string("op_3010_cast_fp16")];
+            int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(0)];
+            int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)];
+            bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, gather_158_cast_uint16_to_int32))[name = string("concat_302")];
+            tensor<int32, [2]> var_3011_begin_0 = const()[name = string("op_3011_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3011_end_mask_0 = const()[name = string("op_3011_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3011_cast_fp16 = slice_by_index(begin = var_3011_begin_0, end = concat_302, end_mask = var_3011_end_mask_0, x = var_3010_cast_fp16)[name = string("op_3011_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_81_cast_fp16 = add(x = qk_79_cast_fp16, y = var_3011_cast_fp16)[name = string("qk_81_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_3014_cast_fp16 = softmax(axis = var_2923, x = qk_81_cast_fp16)[name = string("op_3014_cast_fp16")];
+            bool var_3016_transpose_x_0 = const()[name = string("op_3016_transpose_x_0"), val = bool(false)];
+            bool var_3016_transpose_y_0 = const()[name = string("op_3016_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_135_cast_fp16 = transpose(perm = var_3007, x = var_3006_cast_fp16)[name = string("transpose_536")];
+            tensor<fp16, [1, 20, ?, 64]> var_3016_cast_fp16 = matmul(transpose_x = var_3016_transpose_x_0, transpose_y = var_3016_transpose_y_0, x = var_3014_cast_fp16, y = v_135_cast_fp16)[name = string("op_3016_cast_fp16")];
+            tensor<int32, [4]> var_3017 = const()[name = string("op_3017"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_303x = const()[name = string("concat_303x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3018_cast_fp16 = transpose(perm = var_3017, x = var_3016_cast_fp16)[name = string("transpose_533")];
+            tensor<fp16, [1, ?, 1280]> x_241_cast_fp16 = reshape(shape = concat_303x, x = var_3018_cast_fp16)[name = string("x_241_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3022_to_fp16 = const()[name = string("op_3022_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744936832)))];
+            tensor<fp16, [1280]> var_3023_to_fp16 = const()[name = string("op_3023_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748213696)))];
+            tensor<fp16, [1, ?, 1280]> linear_107_cast_fp16 = linear(bias = var_3023_to_fp16, weight = var_3022_to_fp16, x = x_241_cast_fp16)[name = string("linear_107_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_243_cast_fp16 = add(x = x_237_cast_fp16, y = linear_107_cast_fp16)[name = string("x_243_cast_fp16")];
+            tensor<int32, [1]> var_3030_axes_0 = const()[name = string("op_3030_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_13_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748216320)))];
+            tensor<fp16, [1280]> blocks_13_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748218944)))];
+            tensor<fp16, [1, ?, 1280]> var_3030_cast_fp16 = layer_norm(axes = var_3030_axes_0, beta = blocks_13_cross_attn_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_cross_attn_ln_weight_to_fp16, x = x_243_cast_fp16)[name = string("op_3030_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3039_to_fp16 = const()[name = string("op_3039_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748221568)))];
+            tensor<fp16, [1280]> var_3040_to_fp16 = const()[name = string("op_3040_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751498432)))];
+            tensor<fp16, [1, ?, 1280]> linear_108_cast_fp16 = linear(bias = var_3040_to_fp16, weight = var_3039_to_fp16, x = var_3030_cast_fp16)[name = string("linear_108_cast_fp16")];
+            tensor<int32, [3]> concat_304 = const()[name = string("concat_304"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_305 = const()[name = string("concat_305"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_137_internal_tensor_assign_1_stride_0 = const()[name = string("k_137_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_304, begin_mask = k_137_internal_tensor_assign_1_begin_mask_0, end = concat_305, end_mask = k_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_137_internal_tensor_assign_1_squeeze_mask_0, stride = k_137_internal_tensor_assign_1_stride_0, update = k_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("k_137_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_306 = const()[name = string("concat_306"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_307 = const()[name = string("concat_307"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_137_internal_tensor_assign_1_stride_0 = const()[name = string("v_137_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_306, begin_mask = v_137_internal_tensor_assign_1_begin_mask_0, end = concat_307, end_mask = v_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_137_internal_tensor_assign_1_squeeze_mask_0, stride = v_137_internal_tensor_assign_1_stride_0, update = v_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("v_137_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_308x = const()[name = string("concat_308x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3060_cast_fp16 = reshape(shape = concat_308x, x = linear_108_cast_fp16)[name = string("op_3060_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_214_to_fp16 = const()[name = string("const_214_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_111_cast_fp16 = mul(x = var_3060_cast_fp16, y = const_214_to_fp16)[name = string("q_111_cast_fp16")];
+            tensor<int32, [4]> var_3066 = const()[name = string("op_3066"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3067_cast_fp16 = reshape(shape = var_3066, x = k_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3067_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_139_cast_fp16 = mul(x = var_3067_cast_fp16, y = const_215_to_fp16)[name = string("k_139_cast_fp16")];
+            tensor<int32, [4]> var_3073 = const()[name = string("op_3073"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3074_cast_fp16 = reshape(shape = var_3073, x = v_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3074_cast_fp16")];
+            tensor<int32, [4]> var_3075 = const()[name = string("op_3075"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_83_transpose_x_0 = const()[name = string("qk_83_transpose_x_0"), val = bool(false)];
+            bool qk_83_transpose_y_0 = const()[name = string("qk_83_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_311_perm_0 = const()[name = string("transpose_311_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_312_perm_0 = const()[name = string("transpose_312_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_312 = transpose(perm = transpose_312_perm_0, x = k_139_cast_fp16)[name = string("transpose_530")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_311 = transpose(perm = transpose_311_perm_0, x = q_111_cast_fp16)[name = string("transpose_531")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_83_cast_fp16 = matmul(transpose_x = qk_83_transpose_x_0, transpose_y = qk_83_transpose_y_0, x = transpose_311, y = transpose_312)[name = string("qk_83_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_3079_cast_fp16 = softmax(axis = var_2923, x = qk_83_cast_fp16)[name = string("op_3079_cast_fp16")];
+            bool var_3081_transpose_x_0 = const()[name = string("op_3081_transpose_x_0"), val = bool(false)];
+            bool var_3081_transpose_y_0 = const()[name = string("op_3081_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_139_cast_fp16 = transpose(perm = var_3075, x = var_3074_cast_fp16)[name = string("transpose_532")];
+            tensor<fp16, [1, 20, ?, 64]> var_3081_cast_fp16 = matmul(transpose_x = var_3081_transpose_x_0, transpose_y = var_3081_transpose_y_0, x = var_3079_cast_fp16, y = v_139_cast_fp16)[name = string("op_3081_cast_fp16")];
+            tensor<int32, [4]> var_3082 = const()[name = string("op_3082"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_309x = const()[name = string("concat_309x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3083_cast_fp16 = transpose(perm = var_3082, x = var_3081_cast_fp16)[name = string("transpose_529")];
+            tensor<fp16, [1, ?, 1280]> x_247_cast_fp16 = reshape(shape = concat_309x, x = var_3083_cast_fp16)[name = string("x_247_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3087_to_fp16 = const()[name = string("op_3087_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751501056)))];
+            tensor<fp16, [1280]> var_3088_to_fp16 = const()[name = string("op_3088_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754777920)))];
+            tensor<fp16, [1, ?, 1280]> linear_109_cast_fp16 = linear(bias = var_3088_to_fp16, weight = var_3087_to_fp16, x = x_247_cast_fp16)[name = string("linear_109_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_249_cast_fp16 = add(x = x_243_cast_fp16, y = linear_109_cast_fp16)[name = string("x_249_cast_fp16")];
+            tensor<int32, [1]> var_3095_axes_0 = const()[name = string("op_3095_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754780544)))];
+            tensor<fp16, [1280]> blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754783168)))];
+            tensor<fp16, [1, ?, 1280]> var_3095_cast_fp16 = layer_norm(axes = var_3095_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_249_cast_fp16)[name = string("op_3095_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3104_to_fp16 = const()[name = string("op_3104_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754785792)))];
+            tensor<fp16, [5120]> var_3105_to_fp16 = const()[name = string("op_3105_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767893056)))];
+            tensor<fp16, [1, ?, 5120]> linear_110_cast_fp16 = linear(bias = var_3105_to_fp16, weight = var_3104_to_fp16, x = var_3095_cast_fp16)[name = string("linear_110_cast_fp16")];
+            string x_253_mode_0 = const()[name = string("x_253_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_253_cast_fp16 = gelu(mode = x_253_mode_0, x = linear_110_cast_fp16)[name = string("x_253_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3110_to_fp16 = const()[name = string("op_3110_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767903360)))];
+            tensor<fp16, [1280]> var_3111_to_fp16 = const()[name = string("op_3111_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781010624)))];
+            tensor<fp16, [1, ?, 1280]> linear_111_cast_fp16 = linear(bias = var_3111_to_fp16, weight = var_3110_to_fp16, x = x_253_cast_fp16)[name = string("linear_111_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_255_cast_fp16 = add(x = x_249_cast_fp16, y = linear_111_cast_fp16)[name = string("x_255_cast_fp16")];
+            tensor<int32, [4]> k_cache_57_begin_0 = const()[name = string("k_cache_57_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_57_end_0 = const()[name = string("k_cache_57_end_0"), val = tensor<int32, [4]>([15, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_57_end_mask_0 = const()[name = string("k_cache_57_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_57_squeeze_mask_0 = const()[name = string("k_cache_57_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_57_cast_fp16 = slice_by_index(begin = k_cache_57_begin_0, end = k_cache_57_end_0, end_mask = k_cache_57_end_mask_0, squeeze_mask = k_cache_57_squeeze_mask_0, x = coreml_update_state_90)[name = string("k_cache_57_cast_fp16")];
+            tensor<int32, [4]> v_cache_57_begin_0 = const()[name = string("v_cache_57_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_57_end_0 = const()[name = string("v_cache_57_end_0"), val = tensor<int32, [4]>([15, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_57_end_mask_0 = const()[name = string("v_cache_57_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_57_squeeze_mask_0 = const()[name = string("v_cache_57_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_57_cast_fp16 = slice_by_index(begin = v_cache_57_begin_0, end = v_cache_57_end_0, end_mask = v_cache_57_end_mask_0, squeeze_mask = v_cache_57_squeeze_mask_0, x = coreml_update_state_91)[name = string("v_cache_57_cast_fp16")];
+            tensor<int32, [4]> k_cache_59_begin_0 = const()[name = string("k_cache_59_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_59_end_0 = const()[name = string("k_cache_59_end_0"), val = tensor<int32, [4]>([15, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_59_end_mask_0 = const()[name = string("k_cache_59_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_59_squeeze_mask_0 = const()[name = string("k_cache_59_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_59_cast_fp16 = slice_by_index(begin = k_cache_59_begin_0, end = k_cache_59_end_0, end_mask = k_cache_59_end_mask_0, squeeze_mask = k_cache_59_squeeze_mask_0, x = read_state_2)[name = string("k_cache_59_cast_fp16")];
+            tensor<int32, [4]> v_cache_59_begin_0 = const()[name = string("v_cache_59_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_59_end_0 = const()[name = string("v_cache_59_end_0"), val = tensor<int32, [4]>([15, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_59_end_mask_0 = const()[name = string("v_cache_59_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_59_squeeze_mask_0 = const()[name = string("v_cache_59_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_59_cast_fp16 = slice_by_index(begin = v_cache_59_begin_0, end = v_cache_59_end_0, end_mask = v_cache_59_end_mask_0, squeeze_mask = v_cache_59_squeeze_mask_0, x = read_state_3)[name = string("v_cache_59_cast_fp16")];
+            int32 var_3134 = const()[name = string("op_3134"), val = int32(-1)];
+            tensor<int32, [1]> var_3152_axes_0 = const()[name = string("op_3152_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781013248)))];
+            tensor<fp16, [1280]> blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781015872)))];
+            fp16 var_3140_to_fp16 = const()[name = string("op_3140_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_3152_cast_fp16 = layer_norm(axes = var_3152_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_255_cast_fp16)[name = string("op_3152_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3163_to_fp16 = const()[name = string("op_3163_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781018496)))];
+            tensor<fp16, [1280]> var_3164_to_fp16 = const()[name = string("op_3164_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784295360)))];
+            tensor<fp16, [1, ?, 1280]> linear_112_cast_fp16 = linear(bias = var_3164_to_fp16, weight = var_3163_to_fp16, x = var_3152_cast_fp16)[name = string("linear_112_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3167_to_fp16 = const()[name = string("op_3167_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784297984)))];
+            tensor<fp16, [1, ?, 1280]> linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3167_to_fp16, x = var_3152_cast_fp16)[name = string("linear_113_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3171_to_fp16 = const()[name = string("op_3171_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787574848)))];
+            tensor<fp16, [1280]> var_3172_to_fp16 = const()[name = string("op_3172_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790851712)))];
+            tensor<fp16, [1, ?, 1280]> linear_114_cast_fp16 = linear(bias = var_3172_to_fp16, weight = var_3171_to_fp16, x = var_3152_cast_fp16)[name = string("linear_114_cast_fp16")];
+            tensor<int32, [3]> var_3174_shape_cast_fp16 = shape(x = linear_112_cast_fp16)[name = string("op_3174_shape_cast_fp16")];
+            int32 gather_170_axis_0 = const()[name = string("gather_170_axis_0"), val = int32(0)];
+            int32 gather_170_batch_dims_0 = const()[name = string("gather_170_batch_dims_0"), val = int32(0)];
+            bool gather_170_validate_indices_0 = const()[name = string("gather_170_validate_indices_0"), val = bool(false)];
+            string var_3174_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3174_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_170_to_uint16 = const()[name = string("select_170_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3174_shape_cast_fp16_to_uint16 = cast(dtype = var_3174_shape_cast_fp16_to_uint16_dtype_0, x = var_3174_shape_cast_fp16)[name = string("cast_362")];
+            uint16 gather_170_cast_uint16 = gather(axis = gather_170_axis_0, batch_dims = gather_170_batch_dims_0, indices = select_170_to_uint16, validate_indices = gather_170_validate_indices_0, x = var_3174_shape_cast_fp16_to_uint16)[name = string("gather_170_cast_uint16")];
+            string gather_170_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_170_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_170_cast_uint16_to_int32 = cast(dtype = gather_170_cast_uint16_to_int32_dtype_0, x = gather_170_cast_uint16)[name = string("cast_361")];
+            int32 end_step_31 = add(x = offset, y = gather_170_cast_uint16_to_int32)[name = string("end_step_31")];
+            tensor<int32, [1]> expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = end_step_31)[name = string("expand_dims_227")];
+            tensor<int32, [1]> concat_312_values0_0 = const()[name = string("concat_312_values0_0"), val = tensor<int32, [1]>([14])];
+            int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)];
+            bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (concat_312_values0_0, expand_dims_224, expand_dims_1, expand_dims_226))[name = string("concat_312")];
+            tensor<int32, [1]> concat_313_values0_0 = const()[name = string("concat_313_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)];
+            bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (concat_313_values0_0, concat_313_values1_0, expand_dims_227, concat_313_values3_0))[name = string("concat_313")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = k_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = k_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_15_stride_0, update = linear_113_cast_fp16, x = coreml_update_state_90)[name = string("k_cache1_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_15_cast_fp16, input = k_cache1)[name = string("coreml_update_state_92_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_92 = read_state(input = k_cache1)[name = string("coreml_update_state_92")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = v_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = v_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_15_stride_0, update = linear_114_cast_fp16, x = coreml_update_state_91)[name = string("v_cache1_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_15_cast_fp16, input = v_cache1)[name = string("coreml_update_state_93_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_93 = read_state(input = v_cache1)[name = string("coreml_update_state_93")];
+            int32 concat_318_values0_0 = const()[name = string("concat_318_values0_0"), val = int32(1)];
+            int32 concat_318_values2_0 = const()[name = string("concat_318_values2_0"), val = int32(1280)];
+            int32 concat_318_axis_0 = const()[name = string("concat_318_axis_0"), val = int32(0)];
+            bool concat_318_interleave_0 = const()[name = string("concat_318_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_318 = concat(axis = concat_318_axis_0, interleave = concat_318_interleave_0, values = (concat_318_values0_0, end_step_31, concat_318_values2_0))[name = string("concat_318")];
+            tensor<int32, [3]> var_3190_begin_0 = const()[name = string("op_3190_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3190_end_mask_0 = const()[name = string("op_3190_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3190_cast_fp16 = slice_by_index(begin = var_3190_begin_0, end = concat_318, end_mask = var_3190_end_mask_0, x = k_cache_57_cast_fp16)[name = string("op_3190_cast_fp16")];
+            tensor<int32, [3]> var_3193_begin_0 = const()[name = string("op_3193_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3193_end_mask_0 = const()[name = string("op_3193_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3193_cast_fp16 = slice_by_index(begin = var_3193_begin_0, end = concat_318, end_mask = var_3193_end_mask_0, x = v_cache_57_cast_fp16)[name = string("op_3193_cast_fp16")];
+            tensor<int32, [4]> concat_320x = const()[name = string("concat_320x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3203_cast_fp16 = reshape(shape = concat_320x, x = linear_112_cast_fp16)[name = string("op_3203_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_216_to_fp16 = const()[name = string("const_216_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_115_cast_fp16 = mul(x = var_3203_cast_fp16, y = const_216_to_fp16)[name = string("q_115_cast_fp16")];
+            tensor<int32, [4]> concat_321x = const()[name = string("concat_321x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3210_cast_fp16 = reshape(shape = concat_321x, x = var_3190_cast_fp16)[name = string("op_3210_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_217_to_fp16 = const()[name = string("const_217_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_145_cast_fp16 = mul(x = var_3210_cast_fp16, y = const_217_to_fp16)[name = string("k_145_cast_fp16")];
+            tensor<int32, [4]> concat_322x = const()[name = string("concat_322x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3217_cast_fp16 = reshape(shape = concat_322x, x = var_3193_cast_fp16)[name = string("op_3217_cast_fp16")];
+            tensor<int32, [4]> var_3218 = const()[name = string("op_3218"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_85_transpose_x_0 = const()[name = string("qk_85_transpose_x_0"), val = bool(false)];
+            bool qk_85_transpose_y_0 = const()[name = string("qk_85_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_313_perm_0 = const()[name = string("transpose_313_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_314_perm_0 = const()[name = string("transpose_314_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_314 = transpose(perm = transpose_314_perm_0, x = k_145_cast_fp16)[name = string("transpose_526")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_313 = transpose(perm = transpose_313_perm_0, x = q_115_cast_fp16)[name = string("transpose_527")];
+            tensor<fp16, [1, 20, ?, ?]> qk_85_cast_fp16 = matmul(transpose_x = qk_85_transpose_x_0, transpose_y = qk_85_transpose_y_0, x = transpose_313, y = transpose_314)[name = string("qk_85_cast_fp16")];
+            int32 concat_323_values1_0 = const()[name = string("concat_323_values1_0"), val = int32(448)];
+            int32 concat_323_axis_0 = const()[name = string("concat_323_axis_0"), val = int32(0)];
+            bool concat_323_interleave_0 = const()[name = string("concat_323_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_323 = concat(axis = concat_323_axis_0, interleave = concat_323_interleave_0, values = (gather_170_cast_uint16_to_int32, concat_323_values1_0))[name = string("concat_323")];
+            tensor<int32, [2]> var_3221_begin_0 = const()[name = string("op_3221_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3221_end_mask_0 = const()[name = string("op_3221_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3221_cast_fp16 = slice_by_index(begin = var_3221_begin_0, end = concat_323, end_mask = var_3221_end_mask_0, x = mask_to_fp16)[name = string("op_3221_cast_fp16")];
+            int32 concat_324_values0_0 = const()[name = string("concat_324_values0_0"), val = int32(0)];
+            int32 concat_324_axis_0 = const()[name = string("concat_324_axis_0"), val = int32(0)];
+            bool concat_324_interleave_0 = const()[name = string("concat_324_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_324 = concat(axis = concat_324_axis_0, interleave = concat_324_interleave_0, values = (concat_324_values0_0, gather_170_cast_uint16_to_int32))[name = string("concat_324")];
+            tensor<int32, [2]> var_3222_begin_0 = const()[name = string("op_3222_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3222_end_mask_0 = const()[name = string("op_3222_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3222_cast_fp16 = slice_by_index(begin = var_3222_begin_0, end = concat_324, end_mask = var_3222_end_mask_0, x = var_3221_cast_fp16)[name = string("op_3222_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_87_cast_fp16 = add(x = qk_85_cast_fp16, y = var_3222_cast_fp16)[name = string("qk_87_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_3225_cast_fp16 = softmax(axis = var_3134, x = qk_87_cast_fp16)[name = string("op_3225_cast_fp16")];
+            bool var_3227_transpose_x_0 = const()[name = string("op_3227_transpose_x_0"), val = bool(false)];
+            bool var_3227_transpose_y_0 = const()[name = string("op_3227_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_145_cast_fp16 = transpose(perm = var_3218, x = var_3217_cast_fp16)[name = string("transpose_528")];
+            tensor<fp16, [1, 20, ?, 64]> var_3227_cast_fp16 = matmul(transpose_x = var_3227_transpose_x_0, transpose_y = var_3227_transpose_y_0, x = var_3225_cast_fp16, y = v_145_cast_fp16)[name = string("op_3227_cast_fp16")];
+            tensor<int32, [4]> var_3228 = const()[name = string("op_3228"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_325x = const()[name = string("concat_325x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3229_cast_fp16 = transpose(perm = var_3228, x = var_3227_cast_fp16)[name = string("transpose_525")];
+            tensor<fp16, [1, ?, 1280]> x_259_cast_fp16 = reshape(shape = concat_325x, x = var_3229_cast_fp16)[name = string("x_259_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3233_to_fp16 = const()[name = string("op_3233_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790854336)))];
+            tensor<fp16, [1280]> var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794131200)))];
+            tensor<fp16, [1, ?, 1280]> linear_115_cast_fp16 = linear(bias = var_3234_to_fp16, weight = var_3233_to_fp16, x = x_259_cast_fp16)[name = string("linear_115_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_261_cast_fp16 = add(x = x_255_cast_fp16, y = linear_115_cast_fp16)[name = string("x_261_cast_fp16")];
+            tensor<int32, [1]> var_3241_axes_0 = const()[name = string("op_3241_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_14_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794133824)))];
+            tensor<fp16, [1280]> blocks_14_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794136448)))];
+            tensor<fp16, [1, ?, 1280]> var_3241_cast_fp16 = layer_norm(axes = var_3241_axes_0, beta = blocks_14_cross_attn_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_cross_attn_ln_weight_to_fp16, x = x_261_cast_fp16)[name = string("op_3241_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3250_to_fp16 = const()[name = string("op_3250_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794139072)))];
+            tensor<fp16, [1280]> var_3251_to_fp16 = const()[name = string("op_3251_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797415936)))];
+            tensor<fp16, [1, ?, 1280]> linear_116_cast_fp16 = linear(bias = var_3251_to_fp16, weight = var_3250_to_fp16, x = var_3241_cast_fp16)[name = string("linear_116_cast_fp16")];
+            tensor<int32, [3]> concat_326 = const()[name = string("concat_326"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_327 = const()[name = string("concat_327"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_147_internal_tensor_assign_1_stride_0 = const()[name = string("k_147_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_326, begin_mask = k_147_internal_tensor_assign_1_begin_mask_0, end = concat_327, end_mask = k_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_147_internal_tensor_assign_1_squeeze_mask_0, stride = k_147_internal_tensor_assign_1_stride_0, update = k_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("k_147_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_328 = const()[name = string("concat_328"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_329 = const()[name = string("concat_329"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_147_internal_tensor_assign_1_stride_0 = const()[name = string("v_147_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_328, begin_mask = v_147_internal_tensor_assign_1_begin_mask_0, end = concat_329, end_mask = v_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_147_internal_tensor_assign_1_squeeze_mask_0, stride = v_147_internal_tensor_assign_1_stride_0, update = v_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("v_147_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_330x = const()[name = string("concat_330x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3271_cast_fp16 = reshape(shape = concat_330x, x = linear_116_cast_fp16)[name = string("op_3271_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_218_to_fp16 = const()[name = string("const_218_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_119_cast_fp16 = mul(x = var_3271_cast_fp16, y = const_218_to_fp16)[name = string("q_119_cast_fp16")];
+            tensor<int32, [4]> var_3277 = const()[name = string("op_3277"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3278_cast_fp16 = reshape(shape = var_3277, x = k_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3278_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_219_to_fp16 = const()[name = string("const_219_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_149_cast_fp16 = mul(x = var_3278_cast_fp16, y = const_219_to_fp16)[name = string("k_149_cast_fp16")];
+            tensor<int32, [4]> var_3284 = const()[name = string("op_3284"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3285_cast_fp16 = reshape(shape = var_3284, x = v_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3285_cast_fp16")];
+            tensor<int32, [4]> var_3286 = const()[name = string("op_3286"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_89_transpose_x_0 = const()[name = string("qk_89_transpose_x_0"), val = bool(false)];
+            bool qk_89_transpose_y_0 = const()[name = string("qk_89_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_315_perm_0 = const()[name = string("transpose_315_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_316_perm_0 = const()[name = string("transpose_316_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_316 = transpose(perm = transpose_316_perm_0, x = k_149_cast_fp16)[name = string("transpose_522")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_315 = transpose(perm = transpose_315_perm_0, x = q_119_cast_fp16)[name = string("transpose_523")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_89_cast_fp16 = matmul(transpose_x = qk_89_transpose_x_0, transpose_y = qk_89_transpose_y_0, x = transpose_315, y = transpose_316)[name = string("qk_89_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_3290_cast_fp16 = softmax(axis = var_3134, x = qk_89_cast_fp16)[name = string("op_3290_cast_fp16")];
+            bool var_3292_transpose_x_0 = const()[name = string("op_3292_transpose_x_0"), val = bool(false)];
+            bool var_3292_transpose_y_0 = const()[name = string("op_3292_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_149_cast_fp16 = transpose(perm = var_3286, x = var_3285_cast_fp16)[name = string("transpose_524")];
+            tensor<fp16, [1, 20, ?, 64]> var_3292_cast_fp16 = matmul(transpose_x = var_3292_transpose_x_0, transpose_y = var_3292_transpose_y_0, x = var_3290_cast_fp16, y = v_149_cast_fp16)[name = string("op_3292_cast_fp16")];
+            tensor<int32, [4]> var_3293 = const()[name = string("op_3293"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_331x = const()[name = string("concat_331x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3294_cast_fp16 = transpose(perm = var_3293, x = var_3292_cast_fp16)[name = string("transpose_521")];
+            tensor<fp16, [1, ?, 1280]> x_265_cast_fp16 = reshape(shape = concat_331x, x = var_3294_cast_fp16)[name = string("x_265_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3298_to_fp16 = const()[name = string("op_3298_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797418560)))];
+            tensor<fp16, [1280]> var_3299_to_fp16 = const()[name = string("op_3299_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800695424)))];
+            tensor<fp16, [1, ?, 1280]> linear_117_cast_fp16 = linear(bias = var_3299_to_fp16, weight = var_3298_to_fp16, x = x_265_cast_fp16)[name = string("linear_117_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_267_cast_fp16 = add(x = x_261_cast_fp16, y = linear_117_cast_fp16)[name = string("x_267_cast_fp16")];
+            tensor<int32, [1]> var_3306_axes_0 = const()[name = string("op_3306_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800698048)))];
+            tensor<fp16, [1280]> blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800700672)))];
+            tensor<fp16, [1, ?, 1280]> var_3306_cast_fp16 = layer_norm(axes = var_3306_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_267_cast_fp16)[name = string("op_3306_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3315_to_fp16 = const()[name = string("op_3315_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800703296)))];
+            tensor<fp16, [5120]> var_3316_to_fp16 = const()[name = string("op_3316_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813810560)))];
+            tensor<fp16, [1, ?, 5120]> linear_118_cast_fp16 = linear(bias = var_3316_to_fp16, weight = var_3315_to_fp16, x = var_3306_cast_fp16)[name = string("linear_118_cast_fp16")];
+            string x_271_mode_0 = const()[name = string("x_271_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_271_cast_fp16 = gelu(mode = x_271_mode_0, x = linear_118_cast_fp16)[name = string("x_271_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3321_to_fp16 = const()[name = string("op_3321_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813820864)))];
+            tensor<fp16, [1280]> var_3322_to_fp16 = const()[name = string("op_3322_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826928128)))];
+            tensor<fp16, [1, ?, 1280]> linear_119_cast_fp16 = linear(bias = var_3322_to_fp16, weight = var_3321_to_fp16, x = x_271_cast_fp16)[name = string("linear_119_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_273_cast_fp16 = add(x = x_267_cast_fp16, y = linear_119_cast_fp16)[name = string("x_273_cast_fp16")];
+            tensor<int32, [4]> k_cache_61_begin_0 = const()[name = string("k_cache_61_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_61_end_0 = const()[name = string("k_cache_61_end_0"), val = tensor<int32, [4]>([16, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_61_end_mask_0 = const()[name = string("k_cache_61_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_61_squeeze_mask_0 = const()[name = string("k_cache_61_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_61_cast_fp16 = slice_by_index(begin = k_cache_61_begin_0, end = k_cache_61_end_0, end_mask = k_cache_61_end_mask_0, squeeze_mask = k_cache_61_squeeze_mask_0, x = coreml_update_state_92)[name = string("k_cache_61_cast_fp16")];
+            tensor<int32, [4]> v_cache_61_begin_0 = const()[name = string("v_cache_61_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_61_end_0 = const()[name = string("v_cache_61_end_0"), val = tensor<int32, [4]>([16, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_61_end_mask_0 = const()[name = string("v_cache_61_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_61_squeeze_mask_0 = const()[name = string("v_cache_61_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_61_cast_fp16 = slice_by_index(begin = v_cache_61_begin_0, end = v_cache_61_end_0, end_mask = v_cache_61_end_mask_0, squeeze_mask = v_cache_61_squeeze_mask_0, x = coreml_update_state_93)[name = string("v_cache_61_cast_fp16")];
+            tensor<int32, [4]> k_cache_63_begin_0 = const()[name = string("k_cache_63_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_63_end_0 = const()[name = string("k_cache_63_end_0"), val = tensor<int32, [4]>([16, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_63_end_mask_0 = const()[name = string("k_cache_63_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_63_squeeze_mask_0 = const()[name = string("k_cache_63_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_63_cast_fp16 = slice_by_index(begin = k_cache_63_begin_0, end = k_cache_63_end_0, end_mask = k_cache_63_end_mask_0, squeeze_mask = k_cache_63_squeeze_mask_0, x = read_state_2)[name = string("k_cache_63_cast_fp16")];
+            tensor<int32, [4]> v_cache_63_begin_0 = const()[name = string("v_cache_63_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_63_end_0 = const()[name = string("v_cache_63_end_0"), val = tensor<int32, [4]>([16, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_63_end_mask_0 = const()[name = string("v_cache_63_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_63_squeeze_mask_0 = const()[name = string("v_cache_63_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_63_cast_fp16 = slice_by_index(begin = v_cache_63_begin_0, end = v_cache_63_end_0, end_mask = v_cache_63_end_mask_0, squeeze_mask = v_cache_63_squeeze_mask_0, x = read_state_3)[name = string("v_cache_63_cast_fp16")];
+            int32 var_3345 = const()[name = string("op_3345"), val = int32(-1)];
+            tensor<int32, [1]> var_3363_axes_0 = const()[name = string("op_3363_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826930752)))];
+            tensor<fp16, [1280]> blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826933376)))];
+            fp16 var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_3363_cast_fp16 = layer_norm(axes = var_3363_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_273_cast_fp16)[name = string("op_3363_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3374_to_fp16 = const()[name = string("op_3374_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826936000)))];
+            tensor<fp16, [1280]> var_3375_to_fp16 = const()[name = string("op_3375_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830212864)))];
+            tensor<fp16, [1, ?, 1280]> linear_120_cast_fp16 = linear(bias = var_3375_to_fp16, weight = var_3374_to_fp16, x = var_3363_cast_fp16)[name = string("linear_120_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3378_to_fp16 = const()[name = string("op_3378_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830215488)))];
+            tensor<fp16, [1, ?, 1280]> linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3378_to_fp16, x = var_3363_cast_fp16)[name = string("linear_121_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3382_to_fp16 = const()[name = string("op_3382_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833492352)))];
+            tensor<fp16, [1280]> var_3383_to_fp16 = const()[name = string("op_3383_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(836769216)))];
+            tensor<fp16, [1, ?, 1280]> linear_122_cast_fp16 = linear(bias = var_3383_to_fp16, weight = var_3382_to_fp16, x = var_3363_cast_fp16)[name = string("linear_122_cast_fp16")];
+            tensor<int32, [3]> var_3385_shape_cast_fp16 = shape(x = linear_120_cast_fp16)[name = string("op_3385_shape_cast_fp16")];
+            int32 gather_182_axis_0 = const()[name = string("gather_182_axis_0"), val = int32(0)];
+            int32 gather_182_batch_dims_0 = const()[name = string("gather_182_batch_dims_0"), val = int32(0)];
+            bool gather_182_validate_indices_0 = const()[name = string("gather_182_validate_indices_0"), val = bool(false)];
+            string var_3385_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3385_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_182_to_uint16 = const()[name = string("select_182_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3385_shape_cast_fp16_to_uint16 = cast(dtype = var_3385_shape_cast_fp16_to_uint16_dtype_0, x = var_3385_shape_cast_fp16)[name = string("cast_360")];
+            uint16 gather_182_cast_uint16 = gather(axis = gather_182_axis_0, batch_dims = gather_182_batch_dims_0, indices = select_182_to_uint16, validate_indices = gather_182_validate_indices_0, x = var_3385_shape_cast_fp16_to_uint16)[name = string("gather_182_cast_uint16")];
+            string gather_182_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_182_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_182_cast_uint16_to_int32 = cast(dtype = gather_182_cast_uint16_to_int32_dtype_0, x = gather_182_cast_uint16)[name = string("cast_359")];
+            int32 end_step_33 = add(x = offset, y = gather_182_cast_uint16_to_int32)[name = string("end_step_33")];
+            tensor<int32, [1]> expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = end_step_33)[name = string("expand_dims_243")];
+            tensor<int32, [1]> concat_334_values0_0 = const()[name = string("concat_334_values0_0"), val = tensor<int32, [1]>([15])];
+            int32 concat_334_axis_0 = const()[name = string("concat_334_axis_0"), val = int32(0)];
+            bool concat_334_interleave_0 = const()[name = string("concat_334_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_334 = concat(axis = concat_334_axis_0, interleave = concat_334_interleave_0, values = (concat_334_values0_0, expand_dims_240, expand_dims_1, expand_dims_242))[name = string("concat_334")];
+            tensor<int32, [1]> concat_335_values0_0 = const()[name = string("concat_335_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_335_values1_0 = const()[name = string("concat_335_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_335_values3_0 = const()[name = string("concat_335_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_335_axis_0 = const()[name = string("concat_335_axis_0"), val = int32(0)];
+            bool concat_335_interleave_0 = const()[name = string("concat_335_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_335 = concat(axis = concat_335_axis_0, interleave = concat_335_interleave_0, values = (concat_335_values0_0, concat_335_values1_0, expand_dims_243, concat_335_values3_0))[name = string("concat_335")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = k_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = k_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_16_stride_0, update = linear_121_cast_fp16, x = coreml_update_state_92)[name = string("k_cache1_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_16_cast_fp16, input = k_cache1)[name = string("coreml_update_state_94_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_94 = read_state(input = k_cache1)[name = string("coreml_update_state_94")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = v_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = v_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_16_stride_0, update = linear_122_cast_fp16, x = coreml_update_state_93)[name = string("v_cache1_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_16_cast_fp16, input = v_cache1)[name = string("coreml_update_state_95_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_95 = read_state(input = v_cache1)[name = string("coreml_update_state_95")];
+            int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)];
+            int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(1280)];
+            int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)];
+            bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, end_step_33, concat_340_values2_0))[name = string("concat_340")];
+            tensor<int32, [3]> var_3401_begin_0 = const()[name = string("op_3401_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3401_end_mask_0 = const()[name = string("op_3401_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3401_cast_fp16 = slice_by_index(begin = var_3401_begin_0, end = concat_340, end_mask = var_3401_end_mask_0, x = k_cache_61_cast_fp16)[name = string("op_3401_cast_fp16")];
+            tensor<int32, [3]> var_3404_begin_0 = const()[name = string("op_3404_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3404_end_mask_0 = const()[name = string("op_3404_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3404_cast_fp16 = slice_by_index(begin = var_3404_begin_0, end = concat_340, end_mask = var_3404_end_mask_0, x = v_cache_61_cast_fp16)[name = string("op_3404_cast_fp16")];
+            tensor<int32, [4]> concat_342x = const()[name = string("concat_342x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3414_cast_fp16 = reshape(shape = concat_342x, x = linear_120_cast_fp16)[name = string("op_3414_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_220_to_fp16 = const()[name = string("const_220_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_123_cast_fp16 = mul(x = var_3414_cast_fp16, y = const_220_to_fp16)[name = string("q_123_cast_fp16")];
+            tensor<int32, [4]> concat_343x = const()[name = string("concat_343x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3421_cast_fp16 = reshape(shape = concat_343x, x = var_3401_cast_fp16)[name = string("op_3421_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_221_to_fp16 = const()[name = string("const_221_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_155_cast_fp16 = mul(x = var_3421_cast_fp16, y = const_221_to_fp16)[name = string("k_155_cast_fp16")];
+            tensor<int32, [4]> concat_344x = const()[name = string("concat_344x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3428_cast_fp16 = reshape(shape = concat_344x, x = var_3404_cast_fp16)[name = string("op_3428_cast_fp16")];
+            tensor<int32, [4]> var_3429 = const()[name = string("op_3429"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_91_transpose_x_0 = const()[name = string("qk_91_transpose_x_0"), val = bool(false)];
+            bool qk_91_transpose_y_0 = const()[name = string("qk_91_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_317_perm_0 = const()[name = string("transpose_317_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_318_perm_0 = const()[name = string("transpose_318_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_318 = transpose(perm = transpose_318_perm_0, x = k_155_cast_fp16)[name = string("transpose_518")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_317 = transpose(perm = transpose_317_perm_0, x = q_123_cast_fp16)[name = string("transpose_519")];
+            tensor<fp16, [1, 20, ?, ?]> qk_91_cast_fp16 = matmul(transpose_x = qk_91_transpose_x_0, transpose_y = qk_91_transpose_y_0, x = transpose_317, y = transpose_318)[name = string("qk_91_cast_fp16")];
+            int32 concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = int32(448)];
+            int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)];
+            bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (gather_182_cast_uint16_to_int32, concat_345_values1_0))[name = string("concat_345")];
+            tensor<int32, [2]> var_3432_begin_0 = const()[name = string("op_3432_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3432_end_mask_0 = const()[name = string("op_3432_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3432_cast_fp16 = slice_by_index(begin = var_3432_begin_0, end = concat_345, end_mask = var_3432_end_mask_0, x = mask_to_fp16)[name = string("op_3432_cast_fp16")];
+            int32 concat_346_values0_0 = const()[name = string("concat_346_values0_0"), val = int32(0)];
+            int32 concat_346_axis_0 = const()[name = string("concat_346_axis_0"), val = int32(0)];
+            bool concat_346_interleave_0 = const()[name = string("concat_346_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_346 = concat(axis = concat_346_axis_0, interleave = concat_346_interleave_0, values = (concat_346_values0_0, gather_182_cast_uint16_to_int32))[name = string("concat_346")];
+            tensor<int32, [2]> var_3433_begin_0 = const()[name = string("op_3433_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3433_end_mask_0 = const()[name = string("op_3433_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3433_cast_fp16 = slice_by_index(begin = var_3433_begin_0, end = concat_346, end_mask = var_3433_end_mask_0, x = var_3432_cast_fp16)[name = string("op_3433_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_93_cast_fp16 = add(x = qk_91_cast_fp16, y = var_3433_cast_fp16)[name = string("qk_93_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_3436_cast_fp16 = softmax(axis = var_3345, x = qk_93_cast_fp16)[name = string("op_3436_cast_fp16")];
+            bool var_3438_transpose_x_0 = const()[name = string("op_3438_transpose_x_0"), val = bool(false)];
+            bool var_3438_transpose_y_0 = const()[name = string("op_3438_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_155_cast_fp16 = transpose(perm = var_3429, x = var_3428_cast_fp16)[name = string("transpose_520")];
+            tensor<fp16, [1, 20, ?, 64]> var_3438_cast_fp16 = matmul(transpose_x = var_3438_transpose_x_0, transpose_y = var_3438_transpose_y_0, x = var_3436_cast_fp16, y = v_155_cast_fp16)[name = string("op_3438_cast_fp16")];
+            tensor<int32, [4]> var_3439 = const()[name = string("op_3439"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_347x = const()[name = string("concat_347x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3440_cast_fp16 = transpose(perm = var_3439, x = var_3438_cast_fp16)[name = string("transpose_517")];
+            tensor<fp16, [1, ?, 1280]> x_277_cast_fp16 = reshape(shape = concat_347x, x = var_3440_cast_fp16)[name = string("x_277_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3444_to_fp16 = const()[name = string("op_3444_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(836771840)))];
+            tensor<fp16, [1280]> var_3445_to_fp16 = const()[name = string("op_3445_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840048704)))];
+            tensor<fp16, [1, ?, 1280]> linear_123_cast_fp16 = linear(bias = var_3445_to_fp16, weight = var_3444_to_fp16, x = x_277_cast_fp16)[name = string("linear_123_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_279_cast_fp16 = add(x = x_273_cast_fp16, y = linear_123_cast_fp16)[name = string("x_279_cast_fp16")];
+            tensor<int32, [1]> var_3452_axes_0 = const()[name = string("op_3452_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_15_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840051328)))];
+            tensor<fp16, [1280]> blocks_15_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840053952)))];
+            tensor<fp16, [1, ?, 1280]> var_3452_cast_fp16 = layer_norm(axes = var_3452_axes_0, beta = blocks_15_cross_attn_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_cross_attn_ln_weight_to_fp16, x = x_279_cast_fp16)[name = string("op_3452_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3461_to_fp16 = const()[name = string("op_3461_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840056576)))];
+            tensor<fp16, [1280]> var_3462_to_fp16 = const()[name = string("op_3462_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843333440)))];
+            tensor<fp16, [1, ?, 1280]> linear_124_cast_fp16 = linear(bias = var_3462_to_fp16, weight = var_3461_to_fp16, x = var_3452_cast_fp16)[name = string("linear_124_cast_fp16")];
+            tensor<int32, [3]> concat_348 = const()[name = string("concat_348"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_349 = const()[name = string("concat_349"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_157_internal_tensor_assign_1_stride_0 = const()[name = string("k_157_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_348, begin_mask = k_157_internal_tensor_assign_1_begin_mask_0, end = concat_349, end_mask = k_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_157_internal_tensor_assign_1_squeeze_mask_0, stride = k_157_internal_tensor_assign_1_stride_0, update = k_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("k_157_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_350 = const()[name = string("concat_350"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_351 = const()[name = string("concat_351"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_157_internal_tensor_assign_1_stride_0 = const()[name = string("v_157_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_350, begin_mask = v_157_internal_tensor_assign_1_begin_mask_0, end = concat_351, end_mask = v_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_157_internal_tensor_assign_1_squeeze_mask_0, stride = v_157_internal_tensor_assign_1_stride_0, update = v_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("v_157_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_352x = const()[name = string("concat_352x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3482_cast_fp16 = reshape(shape = concat_352x, x = linear_124_cast_fp16)[name = string("op_3482_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_222_to_fp16 = const()[name = string("const_222_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_127_cast_fp16 = mul(x = var_3482_cast_fp16, y = const_222_to_fp16)[name = string("q_127_cast_fp16")];
+            tensor<int32, [4]> var_3488 = const()[name = string("op_3488"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3489_cast_fp16 = reshape(shape = var_3488, x = k_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3489_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_223_to_fp16 = const()[name = string("const_223_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_159_cast_fp16 = mul(x = var_3489_cast_fp16, y = const_223_to_fp16)[name = string("k_159_cast_fp16")];
+            tensor<int32, [4]> var_3495 = const()[name = string("op_3495"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3496_cast_fp16 = reshape(shape = var_3495, x = v_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3496_cast_fp16")];
+            tensor<int32, [4]> var_3497 = const()[name = string("op_3497"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_95_transpose_x_0 = const()[name = string("qk_95_transpose_x_0"), val = bool(false)];
+            bool qk_95_transpose_y_0 = const()[name = string("qk_95_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_319_perm_0 = const()[name = string("transpose_319_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_320_perm_0 = const()[name = string("transpose_320_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_320 = transpose(perm = transpose_320_perm_0, x = k_159_cast_fp16)[name = string("transpose_514")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_319 = transpose(perm = transpose_319_perm_0, x = q_127_cast_fp16)[name = string("transpose_515")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_95_cast_fp16 = matmul(transpose_x = qk_95_transpose_x_0, transpose_y = qk_95_transpose_y_0, x = transpose_319, y = transpose_320)[name = string("qk_95_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_3501_cast_fp16 = softmax(axis = var_3345, x = qk_95_cast_fp16)[name = string("op_3501_cast_fp16")];
+            bool var_3503_transpose_x_0 = const()[name = string("op_3503_transpose_x_0"), val = bool(false)];
+            bool var_3503_transpose_y_0 = const()[name = string("op_3503_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_159_cast_fp16 = transpose(perm = var_3497, x = var_3496_cast_fp16)[name = string("transpose_516")];
+            tensor<fp16, [1, 20, ?, 64]> var_3503_cast_fp16 = matmul(transpose_x = var_3503_transpose_x_0, transpose_y = var_3503_transpose_y_0, x = var_3501_cast_fp16, y = v_159_cast_fp16)[name = string("op_3503_cast_fp16")];
+            tensor<int32, [4]> var_3504 = const()[name = string("op_3504"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_353x = const()[name = string("concat_353x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3505_cast_fp16 = transpose(perm = var_3504, x = var_3503_cast_fp16)[name = string("transpose_513")];
+            tensor<fp16, [1, ?, 1280]> x_283_cast_fp16 = reshape(shape = concat_353x, x = var_3505_cast_fp16)[name = string("x_283_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3509_to_fp16 = const()[name = string("op_3509_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843336064)))];
+            tensor<fp16, [1280]> var_3510_to_fp16 = const()[name = string("op_3510_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846612928)))];
+            tensor<fp16, [1, ?, 1280]> linear_125_cast_fp16 = linear(bias = var_3510_to_fp16, weight = var_3509_to_fp16, x = x_283_cast_fp16)[name = string("linear_125_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_125_cast_fp16)[name = string("x_285_cast_fp16")];
+            tensor<int32, [1]> var_3517_axes_0 = const()[name = string("op_3517_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846615552)))];
+            tensor<fp16, [1280]> blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846618176)))];
+            tensor<fp16, [1, ?, 1280]> var_3517_cast_fp16 = layer_norm(axes = var_3517_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_285_cast_fp16)[name = string("op_3517_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3526_to_fp16 = const()[name = string("op_3526_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846620800)))];
+            tensor<fp16, [5120]> var_3527_to_fp16 = const()[name = string("op_3527_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859728064)))];
+            tensor<fp16, [1, ?, 5120]> linear_126_cast_fp16 = linear(bias = var_3527_to_fp16, weight = var_3526_to_fp16, x = var_3517_cast_fp16)[name = string("linear_126_cast_fp16")];
+            string x_289_mode_0 = const()[name = string("x_289_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_289_cast_fp16 = gelu(mode = x_289_mode_0, x = linear_126_cast_fp16)[name = string("x_289_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3532_to_fp16 = const()[name = string("op_3532_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859738368)))];
+            tensor<fp16, [1280]> var_3533_to_fp16 = const()[name = string("op_3533_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872845632)))];
+            tensor<fp16, [1, ?, 1280]> linear_127_cast_fp16 = linear(bias = var_3533_to_fp16, weight = var_3532_to_fp16, x = x_289_cast_fp16)[name = string("linear_127_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_291_cast_fp16 = add(x = x_285_cast_fp16, y = linear_127_cast_fp16)[name = string("x_291_cast_fp16")];
+            tensor<int32, [4]> k_cache_65_begin_0 = const()[name = string("k_cache_65_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_65_end_0 = const()[name = string("k_cache_65_end_0"), val = tensor<int32, [4]>([17, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_65_end_mask_0 = const()[name = string("k_cache_65_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_65_squeeze_mask_0 = const()[name = string("k_cache_65_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_65_cast_fp16 = slice_by_index(begin = k_cache_65_begin_0, end = k_cache_65_end_0, end_mask = k_cache_65_end_mask_0, squeeze_mask = k_cache_65_squeeze_mask_0, x = coreml_update_state_94)[name = string("k_cache_65_cast_fp16")];
+            tensor<int32, [4]> v_cache_65_begin_0 = const()[name = string("v_cache_65_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_65_end_0 = const()[name = string("v_cache_65_end_0"), val = tensor<int32, [4]>([17, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_65_end_mask_0 = const()[name = string("v_cache_65_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_65_squeeze_mask_0 = const()[name = string("v_cache_65_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_65_cast_fp16 = slice_by_index(begin = v_cache_65_begin_0, end = v_cache_65_end_0, end_mask = v_cache_65_end_mask_0, squeeze_mask = v_cache_65_squeeze_mask_0, x = coreml_update_state_95)[name = string("v_cache_65_cast_fp16")];
+            tensor<int32, [4]> k_cache_67_begin_0 = const()[name = string("k_cache_67_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_67_end_0 = const()[name = string("k_cache_67_end_0"), val = tensor<int32, [4]>([17, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_67_end_mask_0 = const()[name = string("k_cache_67_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_67_squeeze_mask_0 = const()[name = string("k_cache_67_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_67_cast_fp16 = slice_by_index(begin = k_cache_67_begin_0, end = k_cache_67_end_0, end_mask = k_cache_67_end_mask_0, squeeze_mask = k_cache_67_squeeze_mask_0, x = read_state_2)[name = string("k_cache_67_cast_fp16")];
+            tensor<int32, [4]> v_cache_67_begin_0 = const()[name = string("v_cache_67_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_67_end_0 = const()[name = string("v_cache_67_end_0"), val = tensor<int32, [4]>([17, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_67_end_mask_0 = const()[name = string("v_cache_67_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_67_squeeze_mask_0 = const()[name = string("v_cache_67_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_67_cast_fp16 = slice_by_index(begin = v_cache_67_begin_0, end = v_cache_67_end_0, end_mask = v_cache_67_end_mask_0, squeeze_mask = v_cache_67_squeeze_mask_0, x = read_state_3)[name = string("v_cache_67_cast_fp16")];
+            int32 var_3556 = const()[name = string("op_3556"), val = int32(-1)];
+            tensor<int32, [1]> var_3574_axes_0 = const()[name = string("op_3574_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872848256)))];
+            tensor<fp16, [1280]> blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872850880)))];
+            fp16 var_3562_to_fp16 = const()[name = string("op_3562_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_3574_cast_fp16 = layer_norm(axes = var_3574_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_291_cast_fp16)[name = string("op_3574_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3585_to_fp16 = const()[name = string("op_3585_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872853504)))];
+            tensor<fp16, [1280]> var_3586_to_fp16 = const()[name = string("op_3586_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876130368)))];
+            tensor<fp16, [1, ?, 1280]> linear_128_cast_fp16 = linear(bias = var_3586_to_fp16, weight = var_3585_to_fp16, x = var_3574_cast_fp16)[name = string("linear_128_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3589_to_fp16 = const()[name = string("op_3589_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876132992)))];
+            tensor<fp16, [1, ?, 1280]> linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3589_to_fp16, x = var_3574_cast_fp16)[name = string("linear_129_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3593_to_fp16 = const()[name = string("op_3593_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(879409856)))];
+            tensor<fp16, [1280]> var_3594_to_fp16 = const()[name = string("op_3594_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(882686720)))];
+            tensor<fp16, [1, ?, 1280]> linear_130_cast_fp16 = linear(bias = var_3594_to_fp16, weight = var_3593_to_fp16, x = var_3574_cast_fp16)[name = string("linear_130_cast_fp16")];
+            tensor<int32, [3]> var_3596_shape_cast_fp16 = shape(x = linear_128_cast_fp16)[name = string("op_3596_shape_cast_fp16")];
+            int32 gather_194_axis_0 = const()[name = string("gather_194_axis_0"), val = int32(0)];
+            int32 gather_194_batch_dims_0 = const()[name = string("gather_194_batch_dims_0"), val = int32(0)];
+            bool gather_194_validate_indices_0 = const()[name = string("gather_194_validate_indices_0"), val = bool(false)];
+            string var_3596_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3596_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_194_to_uint16 = const()[name = string("select_194_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3596_shape_cast_fp16_to_uint16 = cast(dtype = var_3596_shape_cast_fp16_to_uint16_dtype_0, x = var_3596_shape_cast_fp16)[name = string("cast_358")];
+            uint16 gather_194_cast_uint16 = gather(axis = gather_194_axis_0, batch_dims = gather_194_batch_dims_0, indices = select_194_to_uint16, validate_indices = gather_194_validate_indices_0, x = var_3596_shape_cast_fp16_to_uint16)[name = string("gather_194_cast_uint16")];
+            string gather_194_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_194_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_194_cast_uint16_to_int32 = cast(dtype = gather_194_cast_uint16_to_int32_dtype_0, x = gather_194_cast_uint16)[name = string("cast_357")];
+            int32 end_step_35 = add(x = offset, y = gather_194_cast_uint16_to_int32)[name = string("end_step_35")];
+            tensor<int32, [1]> expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = end_step_35)[name = string("expand_dims_259")];
+            tensor<int32, [1]> concat_356_values0_0 = const()[name = string("concat_356_values0_0"), val = tensor<int32, [1]>([16])];
+            int32 concat_356_axis_0 = const()[name = string("concat_356_axis_0"), val = int32(0)];
+            bool concat_356_interleave_0 = const()[name = string("concat_356_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_356 = concat(axis = concat_356_axis_0, interleave = concat_356_interleave_0, values = (concat_356_values0_0, expand_dims_256, expand_dims_1, expand_dims_258))[name = string("concat_356")];
+            tensor<int32, [1]> concat_357_values0_0 = const()[name = string("concat_357_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_357_values1_0 = const()[name = string("concat_357_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_357_values3_0 = const()[name = string("concat_357_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)];
+            bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (concat_357_values0_0, concat_357_values1_0, expand_dims_259, concat_357_values3_0))[name = string("concat_357")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = k_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = k_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_17_stride_0, update = linear_129_cast_fp16, x = coreml_update_state_94)[name = string("k_cache1_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_17_cast_fp16, input = k_cache1)[name = string("coreml_update_state_96_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_96 = read_state(input = k_cache1)[name = string("coreml_update_state_96")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = v_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = v_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_17_stride_0, update = linear_130_cast_fp16, x = coreml_update_state_95)[name = string("v_cache1_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_17_cast_fp16, input = v_cache1)[name = string("coreml_update_state_97_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_97 = read_state(input = v_cache1)[name = string("coreml_update_state_97")];
+            int32 concat_362_values0_0 = const()[name = string("concat_362_values0_0"), val = int32(1)];
+            int32 concat_362_values2_0 = const()[name = string("concat_362_values2_0"), val = int32(1280)];
+            int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)];
+            bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (concat_362_values0_0, end_step_35, concat_362_values2_0))[name = string("concat_362")];
+            tensor<int32, [3]> var_3612_begin_0 = const()[name = string("op_3612_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3612_end_mask_0 = const()[name = string("op_3612_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3612_cast_fp16 = slice_by_index(begin = var_3612_begin_0, end = concat_362, end_mask = var_3612_end_mask_0, x = k_cache_65_cast_fp16)[name = string("op_3612_cast_fp16")];
+            tensor<int32, [3]> var_3615_begin_0 = const()[name = string("op_3615_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3615_end_mask_0 = const()[name = string("op_3615_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3615_cast_fp16 = slice_by_index(begin = var_3615_begin_0, end = concat_362, end_mask = var_3615_end_mask_0, x = v_cache_65_cast_fp16)[name = string("op_3615_cast_fp16")];
+            tensor<int32, [4]> concat_364x = const()[name = string("concat_364x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3625_cast_fp16 = reshape(shape = concat_364x, x = linear_128_cast_fp16)[name = string("op_3625_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_224_to_fp16 = const()[name = string("const_224_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_131_cast_fp16 = mul(x = var_3625_cast_fp16, y = const_224_to_fp16)[name = string("q_131_cast_fp16")];
+            tensor<int32, [4]> concat_365x = const()[name = string("concat_365x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3632_cast_fp16 = reshape(shape = concat_365x, x = var_3612_cast_fp16)[name = string("op_3632_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_165_cast_fp16 = mul(x = var_3632_cast_fp16, y = const_225_to_fp16)[name = string("k_165_cast_fp16")];
+            tensor<int32, [4]> concat_366x = const()[name = string("concat_366x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3639_cast_fp16 = reshape(shape = concat_366x, x = var_3615_cast_fp16)[name = string("op_3639_cast_fp16")];
+            tensor<int32, [4]> var_3640 = const()[name = string("op_3640"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_97_transpose_x_0 = const()[name = string("qk_97_transpose_x_0"), val = bool(false)];
+            bool qk_97_transpose_y_0 = const()[name = string("qk_97_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_321_perm_0 = const()[name = string("transpose_321_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_322_perm_0 = const()[name = string("transpose_322_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_322 = transpose(perm = transpose_322_perm_0, x = k_165_cast_fp16)[name = string("transpose_510")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_321 = transpose(perm = transpose_321_perm_0, x = q_131_cast_fp16)[name = string("transpose_511")];
+            tensor<fp16, [1, 20, ?, ?]> qk_97_cast_fp16 = matmul(transpose_x = qk_97_transpose_x_0, transpose_y = qk_97_transpose_y_0, x = transpose_321, y = transpose_322)[name = string("qk_97_cast_fp16")];
+            int32 concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = int32(448)];
+            int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)];
+            bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (gather_194_cast_uint16_to_int32, concat_367_values1_0))[name = string("concat_367")];
+            tensor<int32, [2]> var_3643_begin_0 = const()[name = string("op_3643_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3643_end_mask_0 = const()[name = string("op_3643_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3643_cast_fp16 = slice_by_index(begin = var_3643_begin_0, end = concat_367, end_mask = var_3643_end_mask_0, x = mask_to_fp16)[name = string("op_3643_cast_fp16")];
+            int32 concat_368_values0_0 = const()[name = string("concat_368_values0_0"), val = int32(0)];
+            int32 concat_368_axis_0 = const()[name = string("concat_368_axis_0"), val = int32(0)];
+            bool concat_368_interleave_0 = const()[name = string("concat_368_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_368 = concat(axis = concat_368_axis_0, interleave = concat_368_interleave_0, values = (concat_368_values0_0, gather_194_cast_uint16_to_int32))[name = string("concat_368")];
+            tensor<int32, [2]> var_3644_begin_0 = const()[name = string("op_3644_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3644_end_mask_0 = const()[name = string("op_3644_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3644_cast_fp16 = slice_by_index(begin = var_3644_begin_0, end = concat_368, end_mask = var_3644_end_mask_0, x = var_3643_cast_fp16)[name = string("op_3644_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_99_cast_fp16 = add(x = qk_97_cast_fp16, y = var_3644_cast_fp16)[name = string("qk_99_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_3647_cast_fp16 = softmax(axis = var_3556, x = qk_99_cast_fp16)[name = string("op_3647_cast_fp16")];
+            bool var_3649_transpose_x_0 = const()[name = string("op_3649_transpose_x_0"), val = bool(false)];
+            bool var_3649_transpose_y_0 = const()[name = string("op_3649_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_165_cast_fp16 = transpose(perm = var_3640, x = var_3639_cast_fp16)[name = string("transpose_512")];
+            tensor<fp16, [1, 20, ?, 64]> var_3649_cast_fp16 = matmul(transpose_x = var_3649_transpose_x_0, transpose_y = var_3649_transpose_y_0, x = var_3647_cast_fp16, y = v_165_cast_fp16)[name = string("op_3649_cast_fp16")];
+            tensor<int32, [4]> var_3650 = const()[name = string("op_3650"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_369x = const()[name = string("concat_369x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3651_cast_fp16 = transpose(perm = var_3650, x = var_3649_cast_fp16)[name = string("transpose_509")];
+            tensor<fp16, [1, ?, 1280]> x_295_cast_fp16 = reshape(shape = concat_369x, x = var_3651_cast_fp16)[name = string("x_295_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3655_to_fp16 = const()[name = string("op_3655_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(882689344)))];
+            tensor<fp16, [1280]> var_3656_to_fp16 = const()[name = string("op_3656_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885966208)))];
+            tensor<fp16, [1, ?, 1280]> linear_131_cast_fp16 = linear(bias = var_3656_to_fp16, weight = var_3655_to_fp16, x = x_295_cast_fp16)[name = string("linear_131_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_297_cast_fp16 = add(x = x_291_cast_fp16, y = linear_131_cast_fp16)[name = string("x_297_cast_fp16")];
+            tensor<int32, [1]> var_3663_axes_0 = const()[name = string("op_3663_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_16_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885968832)))];
+            tensor<fp16, [1280]> blocks_16_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885971456)))];
+            tensor<fp16, [1, ?, 1280]> var_3663_cast_fp16 = layer_norm(axes = var_3663_axes_0, beta = blocks_16_cross_attn_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_cross_attn_ln_weight_to_fp16, x = x_297_cast_fp16)[name = string("op_3663_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3672_to_fp16 = const()[name = string("op_3672_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885974080)))];
+            tensor<fp16, [1280]> var_3673_to_fp16 = const()[name = string("op_3673_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889250944)))];
+            tensor<fp16, [1, ?, 1280]> linear_132_cast_fp16 = linear(bias = var_3673_to_fp16, weight = var_3672_to_fp16, x = var_3663_cast_fp16)[name = string("linear_132_cast_fp16")];
+            tensor<int32, [3]> concat_370 = const()[name = string("concat_370"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_371 = const()[name = string("concat_371"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_167_internal_tensor_assign_1_stride_0 = const()[name = string("k_167_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_370, begin_mask = k_167_internal_tensor_assign_1_begin_mask_0, end = concat_371, end_mask = k_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_167_internal_tensor_assign_1_squeeze_mask_0, stride = k_167_internal_tensor_assign_1_stride_0, update = k_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("k_167_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_372 = const()[name = string("concat_372"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_373 = const()[name = string("concat_373"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_167_internal_tensor_assign_1_stride_0 = const()[name = string("v_167_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_372, begin_mask = v_167_internal_tensor_assign_1_begin_mask_0, end = concat_373, end_mask = v_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_167_internal_tensor_assign_1_squeeze_mask_0, stride = v_167_internal_tensor_assign_1_stride_0, update = v_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("v_167_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_374x = const()[name = string("concat_374x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3693_cast_fp16 = reshape(shape = concat_374x, x = linear_132_cast_fp16)[name = string("op_3693_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_226_to_fp16 = const()[name = string("const_226_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_135_cast_fp16 = mul(x = var_3693_cast_fp16, y = const_226_to_fp16)[name = string("q_135_cast_fp16")];
+            tensor<int32, [4]> var_3699 = const()[name = string("op_3699"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3700_cast_fp16 = reshape(shape = var_3699, x = k_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3700_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_227_to_fp16 = const()[name = string("const_227_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_169_cast_fp16 = mul(x = var_3700_cast_fp16, y = const_227_to_fp16)[name = string("k_169_cast_fp16")];
+            tensor<int32, [4]> var_3706 = const()[name = string("op_3706"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3707_cast_fp16 = reshape(shape = var_3706, x = v_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3707_cast_fp16")];
+            tensor<int32, [4]> var_3708 = const()[name = string("op_3708"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_101_transpose_x_0 = const()[name = string("qk_101_transpose_x_0"), val = bool(false)];
+            bool qk_101_transpose_y_0 = const()[name = string("qk_101_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_323_perm_0 = const()[name = string("transpose_323_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_324_perm_0 = const()[name = string("transpose_324_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_324 = transpose(perm = transpose_324_perm_0, x = k_169_cast_fp16)[name = string("transpose_506")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_323 = transpose(perm = transpose_323_perm_0, x = q_135_cast_fp16)[name = string("transpose_507")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_101_cast_fp16 = matmul(transpose_x = qk_101_transpose_x_0, transpose_y = qk_101_transpose_y_0, x = transpose_323, y = transpose_324)[name = string("qk_101_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_3712_cast_fp16 = softmax(axis = var_3556, x = qk_101_cast_fp16)[name = string("op_3712_cast_fp16")];
+            bool var_3714_transpose_x_0 = const()[name = string("op_3714_transpose_x_0"), val = bool(false)];
+            bool var_3714_transpose_y_0 = const()[name = string("op_3714_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_169_cast_fp16 = transpose(perm = var_3708, x = var_3707_cast_fp16)[name = string("transpose_508")];
+            tensor<fp16, [1, 20, ?, 64]> var_3714_cast_fp16 = matmul(transpose_x = var_3714_transpose_x_0, transpose_y = var_3714_transpose_y_0, x = var_3712_cast_fp16, y = v_169_cast_fp16)[name = string("op_3714_cast_fp16")];
+            tensor<int32, [4]> var_3715 = const()[name = string("op_3715"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_375x = const()[name = string("concat_375x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3716_cast_fp16 = transpose(perm = var_3715, x = var_3714_cast_fp16)[name = string("transpose_505")];
+            tensor<fp16, [1, ?, 1280]> x_301_cast_fp16 = reshape(shape = concat_375x, x = var_3716_cast_fp16)[name = string("x_301_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3720_to_fp16 = const()[name = string("op_3720_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889253568)))];
+            tensor<fp16, [1280]> var_3721_to_fp16 = const()[name = string("op_3721_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892530432)))];
+            tensor<fp16, [1, ?, 1280]> linear_133_cast_fp16 = linear(bias = var_3721_to_fp16, weight = var_3720_to_fp16, x = x_301_cast_fp16)[name = string("linear_133_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_303_cast_fp16 = add(x = x_297_cast_fp16, y = linear_133_cast_fp16)[name = string("x_303_cast_fp16")];
+            tensor<int32, [1]> var_3728_axes_0 = const()[name = string("op_3728_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892533056)))];
+            tensor<fp16, [1280]> blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892535680)))];
+            tensor<fp16, [1, ?, 1280]> var_3728_cast_fp16 = layer_norm(axes = var_3728_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_303_cast_fp16)[name = string("op_3728_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3737_to_fp16 = const()[name = string("op_3737_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892538304)))];
+            tensor<fp16, [5120]> var_3738_to_fp16 = const()[name = string("op_3738_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905645568)))];
+            tensor<fp16, [1, ?, 5120]> linear_134_cast_fp16 = linear(bias = var_3738_to_fp16, weight = var_3737_to_fp16, x = var_3728_cast_fp16)[name = string("linear_134_cast_fp16")];
+            string x_307_mode_0 = const()[name = string("x_307_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_307_cast_fp16 = gelu(mode = x_307_mode_0, x = linear_134_cast_fp16)[name = string("x_307_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3743_to_fp16 = const()[name = string("op_3743_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905655872)))];
+            tensor<fp16, [1280]> var_3744_to_fp16 = const()[name = string("op_3744_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918763136)))];
+            tensor<fp16, [1, ?, 1280]> linear_135_cast_fp16 = linear(bias = var_3744_to_fp16, weight = var_3743_to_fp16, x = x_307_cast_fp16)[name = string("linear_135_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_309_cast_fp16 = add(x = x_303_cast_fp16, y = linear_135_cast_fp16)[name = string("x_309_cast_fp16")];
+            tensor<int32, [4]> k_cache_69_begin_0 = const()[name = string("k_cache_69_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_69_end_0 = const()[name = string("k_cache_69_end_0"), val = tensor<int32, [4]>([18, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_69_end_mask_0 = const()[name = string("k_cache_69_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_69_squeeze_mask_0 = const()[name = string("k_cache_69_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_69_cast_fp16 = slice_by_index(begin = k_cache_69_begin_0, end = k_cache_69_end_0, end_mask = k_cache_69_end_mask_0, squeeze_mask = k_cache_69_squeeze_mask_0, x = coreml_update_state_96)[name = string("k_cache_69_cast_fp16")];
+            tensor<int32, [4]> v_cache_69_begin_0 = const()[name = string("v_cache_69_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_69_end_0 = const()[name = string("v_cache_69_end_0"), val = tensor<int32, [4]>([18, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_69_end_mask_0 = const()[name = string("v_cache_69_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_69_squeeze_mask_0 = const()[name = string("v_cache_69_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_69_cast_fp16 = slice_by_index(begin = v_cache_69_begin_0, end = v_cache_69_end_0, end_mask = v_cache_69_end_mask_0, squeeze_mask = v_cache_69_squeeze_mask_0, x = coreml_update_state_97)[name = string("v_cache_69_cast_fp16")];
+            tensor<int32, [4]> k_cache_71_begin_0 = const()[name = string("k_cache_71_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_71_end_0 = const()[name = string("k_cache_71_end_0"), val = tensor<int32, [4]>([18, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_71_end_mask_0 = const()[name = string("k_cache_71_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_71_squeeze_mask_0 = const()[name = string("k_cache_71_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_71_cast_fp16 = slice_by_index(begin = k_cache_71_begin_0, end = k_cache_71_end_0, end_mask = k_cache_71_end_mask_0, squeeze_mask = k_cache_71_squeeze_mask_0, x = read_state_2)[name = string("k_cache_71_cast_fp16")];
+            tensor<int32, [4]> v_cache_71_begin_0 = const()[name = string("v_cache_71_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_71_end_0 = const()[name = string("v_cache_71_end_0"), val = tensor<int32, [4]>([18, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_71_end_mask_0 = const()[name = string("v_cache_71_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_71_squeeze_mask_0 = const()[name = string("v_cache_71_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_71_cast_fp16 = slice_by_index(begin = v_cache_71_begin_0, end = v_cache_71_end_0, end_mask = v_cache_71_end_mask_0, squeeze_mask = v_cache_71_squeeze_mask_0, x = read_state_3)[name = string("v_cache_71_cast_fp16")];
+            int32 var_3767 = const()[name = string("op_3767"), val = int32(-1)];
+            tensor<int32, [1]> var_3785_axes_0 = const()[name = string("op_3785_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918765760)))];
+            tensor<fp16, [1280]> blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918768384)))];
+            fp16 var_3773_to_fp16 = const()[name = string("op_3773_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_3785_cast_fp16 = layer_norm(axes = var_3785_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_309_cast_fp16)[name = string("op_3785_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3796_to_fp16 = const()[name = string("op_3796_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918771008)))];
+            tensor<fp16, [1280]> var_3797_to_fp16 = const()[name = string("op_3797_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922047872)))];
+            tensor<fp16, [1, ?, 1280]> linear_136_cast_fp16 = linear(bias = var_3797_to_fp16, weight = var_3796_to_fp16, x = var_3785_cast_fp16)[name = string("linear_136_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3800_to_fp16 = const()[name = string("op_3800_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922050496)))];
+            tensor<fp16, [1, ?, 1280]> linear_137_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3800_to_fp16, x = var_3785_cast_fp16)[name = string("linear_137_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3804_to_fp16 = const()[name = string("op_3804_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(925327360)))];
+            tensor<fp16, [1280]> var_3805_to_fp16 = const()[name = string("op_3805_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928604224)))];
+            tensor<fp16, [1, ?, 1280]> linear_138_cast_fp16 = linear(bias = var_3805_to_fp16, weight = var_3804_to_fp16, x = var_3785_cast_fp16)[name = string("linear_138_cast_fp16")];
+            tensor<int32, [3]> var_3807_shape_cast_fp16 = shape(x = linear_136_cast_fp16)[name = string("op_3807_shape_cast_fp16")];
+            int32 gather_206_axis_0 = const()[name = string("gather_206_axis_0"), val = int32(0)];
+            int32 gather_206_batch_dims_0 = const()[name = string("gather_206_batch_dims_0"), val = int32(0)];
+            bool gather_206_validate_indices_0 = const()[name = string("gather_206_validate_indices_0"), val = bool(false)];
+            string var_3807_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3807_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_206_to_uint16 = const()[name = string("select_206_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3807_shape_cast_fp16_to_uint16 = cast(dtype = var_3807_shape_cast_fp16_to_uint16_dtype_0, x = var_3807_shape_cast_fp16)[name = string("cast_356")];
+            uint16 gather_206_cast_uint16 = gather(axis = gather_206_axis_0, batch_dims = gather_206_batch_dims_0, indices = select_206_to_uint16, validate_indices = gather_206_validate_indices_0, x = var_3807_shape_cast_fp16_to_uint16)[name = string("gather_206_cast_uint16")];
+            string gather_206_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_206_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_206_cast_uint16_to_int32 = cast(dtype = gather_206_cast_uint16_to_int32_dtype_0, x = gather_206_cast_uint16)[name = string("cast_355")];
+            int32 end_step_37 = add(x = offset, y = gather_206_cast_uint16_to_int32)[name = string("end_step_37")];
+            tensor<int32, [1]> expand_dims_272 = const()[name = string("expand_dims_272"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_275_axes_0 = const()[name = string("expand_dims_275_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_275 = expand_dims(axes = expand_dims_275_axes_0, x = end_step_37)[name = string("expand_dims_275")];
+            tensor<int32, [1]> concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = tensor<int32, [1]>([17])];
+            int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)];
+            bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, expand_dims_272, expand_dims_1, expand_dims_274))[name = string("concat_378")];
+            tensor<int32, [1]> concat_379_values0_0 = const()[name = string("concat_379_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_379_values1_0 = const()[name = string("concat_379_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_379_values3_0 = const()[name = string("concat_379_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)];
+            bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (concat_379_values0_0, concat_379_values1_0, expand_dims_275, concat_379_values3_0))[name = string("concat_379")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = k_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = k_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_18_stride_0, update = linear_137_cast_fp16, x = coreml_update_state_96)[name = string("k_cache1_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_18_cast_fp16, input = k_cache1)[name = string("coreml_update_state_98_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_98 = read_state(input = k_cache1)[name = string("coreml_update_state_98")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = v_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = v_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_18_stride_0, update = linear_138_cast_fp16, x = coreml_update_state_97)[name = string("v_cache1_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_18_cast_fp16, input = v_cache1)[name = string("coreml_update_state_99_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_99 = read_state(input = v_cache1)[name = string("coreml_update_state_99")];
+            int32 concat_384_values0_0 = const()[name = string("concat_384_values0_0"), val = int32(1)];
+            int32 concat_384_values2_0 = const()[name = string("concat_384_values2_0"), val = int32(1280)];
+            int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)];
+            bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (concat_384_values0_0, end_step_37, concat_384_values2_0))[name = string("concat_384")];
+            tensor<int32, [3]> var_3823_begin_0 = const()[name = string("op_3823_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3823_end_mask_0 = const()[name = string("op_3823_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3823_cast_fp16 = slice_by_index(begin = var_3823_begin_0, end = concat_384, end_mask = var_3823_end_mask_0, x = k_cache_69_cast_fp16)[name = string("op_3823_cast_fp16")];
+            tensor<int32, [3]> var_3826_begin_0 = const()[name = string("op_3826_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3826_end_mask_0 = const()[name = string("op_3826_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_3826_cast_fp16 = slice_by_index(begin = var_3826_begin_0, end = concat_384, end_mask = var_3826_end_mask_0, x = v_cache_69_cast_fp16)[name = string("op_3826_cast_fp16")];
+            tensor<int32, [4]> concat_386x = const()[name = string("concat_386x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3836_cast_fp16 = reshape(shape = concat_386x, x = linear_136_cast_fp16)[name = string("op_3836_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_228_to_fp16 = const()[name = string("const_228_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_139_cast_fp16 = mul(x = var_3836_cast_fp16, y = const_228_to_fp16)[name = string("q_139_cast_fp16")];
+            tensor<int32, [4]> concat_387x = const()[name = string("concat_387x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3843_cast_fp16 = reshape(shape = concat_387x, x = var_3823_cast_fp16)[name = string("op_3843_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_229_to_fp16 = const()[name = string("const_229_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_175_cast_fp16 = mul(x = var_3843_cast_fp16, y = const_229_to_fp16)[name = string("k_175_cast_fp16")];
+            tensor<int32, [4]> concat_388x = const()[name = string("concat_388x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3850_cast_fp16 = reshape(shape = concat_388x, x = var_3826_cast_fp16)[name = string("op_3850_cast_fp16")];
+            tensor<int32, [4]> var_3851 = const()[name = string("op_3851"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_103_transpose_x_0 = const()[name = string("qk_103_transpose_x_0"), val = bool(false)];
+            bool qk_103_transpose_y_0 = const()[name = string("qk_103_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_325_perm_0 = const()[name = string("transpose_325_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_326_perm_0 = const()[name = string("transpose_326_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_326 = transpose(perm = transpose_326_perm_0, x = k_175_cast_fp16)[name = string("transpose_502")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_325 = transpose(perm = transpose_325_perm_0, x = q_139_cast_fp16)[name = string("transpose_503")];
+            tensor<fp16, [1, 20, ?, ?]> qk_103_cast_fp16 = matmul(transpose_x = qk_103_transpose_x_0, transpose_y = qk_103_transpose_y_0, x = transpose_325, y = transpose_326)[name = string("qk_103_cast_fp16")];
+            int32 concat_389_values1_0 = const()[name = string("concat_389_values1_0"), val = int32(448)];
+            int32 concat_389_axis_0 = const()[name = string("concat_389_axis_0"), val = int32(0)];
+            bool concat_389_interleave_0 = const()[name = string("concat_389_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_389 = concat(axis = concat_389_axis_0, interleave = concat_389_interleave_0, values = (gather_206_cast_uint16_to_int32, concat_389_values1_0))[name = string("concat_389")];
+            tensor<int32, [2]> var_3854_begin_0 = const()[name = string("op_3854_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3854_end_mask_0 = const()[name = string("op_3854_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3854_cast_fp16 = slice_by_index(begin = var_3854_begin_0, end = concat_389, end_mask = var_3854_end_mask_0, x = mask_to_fp16)[name = string("op_3854_cast_fp16")];
+            int32 concat_390_values0_0 = const()[name = string("concat_390_values0_0"), val = int32(0)];
+            int32 concat_390_axis_0 = const()[name = string("concat_390_axis_0"), val = int32(0)];
+            bool concat_390_interleave_0 = const()[name = string("concat_390_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_390 = concat(axis = concat_390_axis_0, interleave = concat_390_interleave_0, values = (concat_390_values0_0, gather_206_cast_uint16_to_int32))[name = string("concat_390")];
+            tensor<int32, [2]> var_3855_begin_0 = const()[name = string("op_3855_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3855_end_mask_0 = const()[name = string("op_3855_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3855_cast_fp16 = slice_by_index(begin = var_3855_begin_0, end = concat_390, end_mask = var_3855_end_mask_0, x = var_3854_cast_fp16)[name = string("op_3855_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_105_cast_fp16 = add(x = qk_103_cast_fp16, y = var_3855_cast_fp16)[name = string("qk_105_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_3858_cast_fp16 = softmax(axis = var_3767, x = qk_105_cast_fp16)[name = string("op_3858_cast_fp16")];
+            bool var_3860_transpose_x_0 = const()[name = string("op_3860_transpose_x_0"), val = bool(false)];
+            bool var_3860_transpose_y_0 = const()[name = string("op_3860_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_175_cast_fp16 = transpose(perm = var_3851, x = var_3850_cast_fp16)[name = string("transpose_504")];
+            tensor<fp16, [1, 20, ?, 64]> var_3860_cast_fp16 = matmul(transpose_x = var_3860_transpose_x_0, transpose_y = var_3860_transpose_y_0, x = var_3858_cast_fp16, y = v_175_cast_fp16)[name = string("op_3860_cast_fp16")];
+            tensor<int32, [4]> var_3861 = const()[name = string("op_3861"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_391x = const()[name = string("concat_391x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3862_cast_fp16 = transpose(perm = var_3861, x = var_3860_cast_fp16)[name = string("transpose_501")];
+            tensor<fp16, [1, ?, 1280]> x_313_cast_fp16 = reshape(shape = concat_391x, x = var_3862_cast_fp16)[name = string("x_313_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3866_to_fp16 = const()[name = string("op_3866_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928606848)))];
+            tensor<fp16, [1280]> var_3867_to_fp16 = const()[name = string("op_3867_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931883712)))];
+            tensor<fp16, [1, ?, 1280]> linear_139_cast_fp16 = linear(bias = var_3867_to_fp16, weight = var_3866_to_fp16, x = x_313_cast_fp16)[name = string("linear_139_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_315_cast_fp16 = add(x = x_309_cast_fp16, y = linear_139_cast_fp16)[name = string("x_315_cast_fp16")];
+            tensor<int32, [1]> var_3874_axes_0 = const()[name = string("op_3874_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_17_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931886336)))];
+            tensor<fp16, [1280]> blocks_17_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931888960)))];
+            tensor<fp16, [1, ?, 1280]> var_3874_cast_fp16 = layer_norm(axes = var_3874_axes_0, beta = blocks_17_cross_attn_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_cross_attn_ln_weight_to_fp16, x = x_315_cast_fp16)[name = string("op_3874_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3883_to_fp16 = const()[name = string("op_3883_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931891584)))];
+            tensor<fp16, [1280]> var_3884_to_fp16 = const()[name = string("op_3884_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935168448)))];
+            tensor<fp16, [1, ?, 1280]> linear_140_cast_fp16 = linear(bias = var_3884_to_fp16, weight = var_3883_to_fp16, x = var_3874_cast_fp16)[name = string("linear_140_cast_fp16")];
+            tensor<int32, [3]> concat_392 = const()[name = string("concat_392"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_393 = const()[name = string("concat_393"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_177_internal_tensor_assign_1_stride_0 = const()[name = string("k_177_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_392, begin_mask = k_177_internal_tensor_assign_1_begin_mask_0, end = concat_393, end_mask = k_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_177_internal_tensor_assign_1_squeeze_mask_0, stride = k_177_internal_tensor_assign_1_stride_0, update = k_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("k_177_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_394 = const()[name = string("concat_394"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_395 = const()[name = string("concat_395"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_177_internal_tensor_assign_1_stride_0 = const()[name = string("v_177_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_394, begin_mask = v_177_internal_tensor_assign_1_begin_mask_0, end = concat_395, end_mask = v_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_177_internal_tensor_assign_1_squeeze_mask_0, stride = v_177_internal_tensor_assign_1_stride_0, update = v_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("v_177_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_396x = const()[name = string("concat_396x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_3904_cast_fp16 = reshape(shape = concat_396x, x = linear_140_cast_fp16)[name = string("op_3904_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_143_cast_fp16 = mul(x = var_3904_cast_fp16, y = const_230_to_fp16)[name = string("q_143_cast_fp16")];
+            tensor<int32, [4]> var_3910 = const()[name = string("op_3910"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3911_cast_fp16 = reshape(shape = var_3910, x = k_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3911_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_179_cast_fp16 = mul(x = var_3911_cast_fp16, y = const_231_to_fp16)[name = string("k_179_cast_fp16")];
+            tensor<int32, [4]> var_3917 = const()[name = string("op_3917"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3918_cast_fp16 = reshape(shape = var_3917, x = v_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3918_cast_fp16")];
+            tensor<int32, [4]> var_3919 = const()[name = string("op_3919"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_107_transpose_x_0 = const()[name = string("qk_107_transpose_x_0"), val = bool(false)];
+            bool qk_107_transpose_y_0 = const()[name = string("qk_107_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_327_perm_0 = const()[name = string("transpose_327_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_328_perm_0 = const()[name = string("transpose_328_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_328 = transpose(perm = transpose_328_perm_0, x = k_179_cast_fp16)[name = string("transpose_498")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_327 = transpose(perm = transpose_327_perm_0, x = q_143_cast_fp16)[name = string("transpose_499")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_107_cast_fp16 = matmul(transpose_x = qk_107_transpose_x_0, transpose_y = qk_107_transpose_y_0, x = transpose_327, y = transpose_328)[name = string("qk_107_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_3923_cast_fp16 = softmax(axis = var_3767, x = qk_107_cast_fp16)[name = string("op_3923_cast_fp16")];
+            bool var_3925_transpose_x_0 = const()[name = string("op_3925_transpose_x_0"), val = bool(false)];
+            bool var_3925_transpose_y_0 = const()[name = string("op_3925_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_179_cast_fp16 = transpose(perm = var_3919, x = var_3918_cast_fp16)[name = string("transpose_500")];
+            tensor<fp16, [1, 20, ?, 64]> var_3925_cast_fp16 = matmul(transpose_x = var_3925_transpose_x_0, transpose_y = var_3925_transpose_y_0, x = var_3923_cast_fp16, y = v_179_cast_fp16)[name = string("op_3925_cast_fp16")];
+            tensor<int32, [4]> var_3926 = const()[name = string("op_3926"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_397x = const()[name = string("concat_397x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_3927_cast_fp16 = transpose(perm = var_3926, x = var_3925_cast_fp16)[name = string("transpose_497")];
+            tensor<fp16, [1, ?, 1280]> x_319_cast_fp16 = reshape(shape = concat_397x, x = var_3927_cast_fp16)[name = string("x_319_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3931_to_fp16 = const()[name = string("op_3931_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935171072)))];
+            tensor<fp16, [1280]> var_3932_to_fp16 = const()[name = string("op_3932_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938447936)))];
+            tensor<fp16, [1, ?, 1280]> linear_141_cast_fp16 = linear(bias = var_3932_to_fp16, weight = var_3931_to_fp16, x = x_319_cast_fp16)[name = string("linear_141_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_321_cast_fp16 = add(x = x_315_cast_fp16, y = linear_141_cast_fp16)[name = string("x_321_cast_fp16")];
+            tensor<int32, [1]> var_3939_axes_0 = const()[name = string("op_3939_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938450560)))];
+            tensor<fp16, [1280]> blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938453184)))];
+            tensor<fp16, [1, ?, 1280]> var_3939_cast_fp16 = layer_norm(axes = var_3939_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_321_cast_fp16)[name = string("op_3939_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3948_to_fp16 = const()[name = string("op_3948_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938455808)))];
+            tensor<fp16, [5120]> var_3949_to_fp16 = const()[name = string("op_3949_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951563072)))];
+            tensor<fp16, [1, ?, 5120]> linear_142_cast_fp16 = linear(bias = var_3949_to_fp16, weight = var_3948_to_fp16, x = var_3939_cast_fp16)[name = string("linear_142_cast_fp16")];
+            string x_325_mode_0 = const()[name = string("x_325_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_325_cast_fp16 = gelu(mode = x_325_mode_0, x = linear_142_cast_fp16)[name = string("x_325_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3954_to_fp16 = const()[name = string("op_3954_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951573376)))];
+            tensor<fp16, [1280]> var_3955_to_fp16 = const()[name = string("op_3955_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964680640)))];
+            tensor<fp16, [1, ?, 1280]> linear_143_cast_fp16 = linear(bias = var_3955_to_fp16, weight = var_3954_to_fp16, x = x_325_cast_fp16)[name = string("linear_143_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_327_cast_fp16 = add(x = x_321_cast_fp16, y = linear_143_cast_fp16)[name = string("x_327_cast_fp16")];
+            tensor<int32, [4]> k_cache_73_begin_0 = const()[name = string("k_cache_73_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_73_end_0 = const()[name = string("k_cache_73_end_0"), val = tensor<int32, [4]>([19, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_73_end_mask_0 = const()[name = string("k_cache_73_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_73_squeeze_mask_0 = const()[name = string("k_cache_73_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_73_cast_fp16 = slice_by_index(begin = k_cache_73_begin_0, end = k_cache_73_end_0, end_mask = k_cache_73_end_mask_0, squeeze_mask = k_cache_73_squeeze_mask_0, x = coreml_update_state_98)[name = string("k_cache_73_cast_fp16")];
+            tensor<int32, [4]> v_cache_73_begin_0 = const()[name = string("v_cache_73_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_73_end_0 = const()[name = string("v_cache_73_end_0"), val = tensor<int32, [4]>([19, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_73_end_mask_0 = const()[name = string("v_cache_73_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_73_squeeze_mask_0 = const()[name = string("v_cache_73_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_73_cast_fp16 = slice_by_index(begin = v_cache_73_begin_0, end = v_cache_73_end_0, end_mask = v_cache_73_end_mask_0, squeeze_mask = v_cache_73_squeeze_mask_0, x = coreml_update_state_99)[name = string("v_cache_73_cast_fp16")];
+            tensor<int32, [4]> k_cache_75_begin_0 = const()[name = string("k_cache_75_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_75_end_0 = const()[name = string("k_cache_75_end_0"), val = tensor<int32, [4]>([19, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_75_end_mask_0 = const()[name = string("k_cache_75_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_75_squeeze_mask_0 = const()[name = string("k_cache_75_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_75_cast_fp16 = slice_by_index(begin = k_cache_75_begin_0, end = k_cache_75_end_0, end_mask = k_cache_75_end_mask_0, squeeze_mask = k_cache_75_squeeze_mask_0, x = read_state_2)[name = string("k_cache_75_cast_fp16")];
+            tensor<int32, [4]> v_cache_75_begin_0 = const()[name = string("v_cache_75_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_75_end_0 = const()[name = string("v_cache_75_end_0"), val = tensor<int32, [4]>([19, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_75_end_mask_0 = const()[name = string("v_cache_75_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_75_squeeze_mask_0 = const()[name = string("v_cache_75_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_75_cast_fp16 = slice_by_index(begin = v_cache_75_begin_0, end = v_cache_75_end_0, end_mask = v_cache_75_end_mask_0, squeeze_mask = v_cache_75_squeeze_mask_0, x = read_state_3)[name = string("v_cache_75_cast_fp16")];
+            int32 var_3978 = const()[name = string("op_3978"), val = int32(-1)];
+            tensor<int32, [1]> var_3996_axes_0 = const()[name = string("op_3996_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964683264)))];
+            tensor<fp16, [1280]> blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964685888)))];
+            fp16 var_3984_to_fp16 = const()[name = string("op_3984_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_3996_cast_fp16 = layer_norm(axes = var_3996_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_327_cast_fp16)[name = string("op_3996_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4007_to_fp16 = const()[name = string("op_4007_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964688512)))];
+            tensor<fp16, [1280]> var_4008_to_fp16 = const()[name = string("op_4008_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967965376)))];
+            tensor<fp16, [1, ?, 1280]> linear_144_cast_fp16 = linear(bias = var_4008_to_fp16, weight = var_4007_to_fp16, x = var_3996_cast_fp16)[name = string("linear_144_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4011_to_fp16 = const()[name = string("op_4011_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967968000)))];
+            tensor<fp16, [1, ?, 1280]> linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4011_to_fp16, x = var_3996_cast_fp16)[name = string("linear_145_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4015_to_fp16 = const()[name = string("op_4015_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971244864)))];
+            tensor<fp16, [1280]> var_4016_to_fp16 = const()[name = string("op_4016_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(974521728)))];
+            tensor<fp16, [1, ?, 1280]> linear_146_cast_fp16 = linear(bias = var_4016_to_fp16, weight = var_4015_to_fp16, x = var_3996_cast_fp16)[name = string("linear_146_cast_fp16")];
+            tensor<int32, [3]> var_4018_shape_cast_fp16 = shape(x = linear_144_cast_fp16)[name = string("op_4018_shape_cast_fp16")];
+            int32 gather_218_axis_0 = const()[name = string("gather_218_axis_0"), val = int32(0)];
+            int32 gather_218_batch_dims_0 = const()[name = string("gather_218_batch_dims_0"), val = int32(0)];
+            bool gather_218_validate_indices_0 = const()[name = string("gather_218_validate_indices_0"), val = bool(false)];
+            string var_4018_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4018_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_218_to_uint16 = const()[name = string("select_218_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4018_shape_cast_fp16_to_uint16 = cast(dtype = var_4018_shape_cast_fp16_to_uint16_dtype_0, x = var_4018_shape_cast_fp16)[name = string("cast_354")];
+            uint16 gather_218_cast_uint16 = gather(axis = gather_218_axis_0, batch_dims = gather_218_batch_dims_0, indices = select_218_to_uint16, validate_indices = gather_218_validate_indices_0, x = var_4018_shape_cast_fp16_to_uint16)[name = string("gather_218_cast_uint16")];
+            string gather_218_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_218_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_218_cast_uint16_to_int32 = cast(dtype = gather_218_cast_uint16_to_int32_dtype_0, x = gather_218_cast_uint16)[name = string("cast_353")];
+            int32 end_step_39 = add(x = offset, y = gather_218_cast_uint16_to_int32)[name = string("end_step_39")];
+            tensor<int32, [1]> expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_290 = const()[name = string("expand_dims_290"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_291_axes_0 = const()[name = string("expand_dims_291_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_291 = expand_dims(axes = expand_dims_291_axes_0, x = end_step_39)[name = string("expand_dims_291")];
+            tensor<int32, [1]> concat_400_values0_0 = const()[name = string("concat_400_values0_0"), val = tensor<int32, [1]>([18])];
+            int32 concat_400_axis_0 = const()[name = string("concat_400_axis_0"), val = int32(0)];
+            bool concat_400_interleave_0 = const()[name = string("concat_400_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_400 = concat(axis = concat_400_axis_0, interleave = concat_400_interleave_0, values = (concat_400_values0_0, expand_dims_288, expand_dims_1, expand_dims_290))[name = string("concat_400")];
+            tensor<int32, [1]> concat_401_values0_0 = const()[name = string("concat_401_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_401_values1_0 = const()[name = string("concat_401_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_401_values3_0 = const()[name = string("concat_401_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_401_axis_0 = const()[name = string("concat_401_axis_0"), val = int32(0)];
+            bool concat_401_interleave_0 = const()[name = string("concat_401_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_401 = concat(axis = concat_401_axis_0, interleave = concat_401_interleave_0, values = (concat_401_values0_0, concat_401_values1_0, expand_dims_291, concat_401_values3_0))[name = string("concat_401")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = k_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = k_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_19_stride_0, update = linear_145_cast_fp16, x = coreml_update_state_98)[name = string("k_cache1_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_19_cast_fp16, input = k_cache1)[name = string("coreml_update_state_100_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_100 = read_state(input = k_cache1)[name = string("coreml_update_state_100")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = v_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = v_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_19_stride_0, update = linear_146_cast_fp16, x = coreml_update_state_99)[name = string("v_cache1_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_19_cast_fp16, input = v_cache1)[name = string("coreml_update_state_101_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_101 = read_state(input = v_cache1)[name = string("coreml_update_state_101")];
+            int32 concat_406_values0_0 = const()[name = string("concat_406_values0_0"), val = int32(1)];
+            int32 concat_406_values2_0 = const()[name = string("concat_406_values2_0"), val = int32(1280)];
+            int32 concat_406_axis_0 = const()[name = string("concat_406_axis_0"), val = int32(0)];
+            bool concat_406_interleave_0 = const()[name = string("concat_406_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_406 = concat(axis = concat_406_axis_0, interleave = concat_406_interleave_0, values = (concat_406_values0_0, end_step_39, concat_406_values2_0))[name = string("concat_406")];
+            tensor<int32, [3]> var_4034_begin_0 = const()[name = string("op_4034_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4034_end_mask_0 = const()[name = string("op_4034_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4034_cast_fp16 = slice_by_index(begin = var_4034_begin_0, end = concat_406, end_mask = var_4034_end_mask_0, x = k_cache_73_cast_fp16)[name = string("op_4034_cast_fp16")];
+            tensor<int32, [3]> var_4037_begin_0 = const()[name = string("op_4037_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4037_end_mask_0 = const()[name = string("op_4037_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4037_cast_fp16 = slice_by_index(begin = var_4037_begin_0, end = concat_406, end_mask = var_4037_end_mask_0, x = v_cache_73_cast_fp16)[name = string("op_4037_cast_fp16")];
+            tensor<int32, [4]> concat_408x = const()[name = string("concat_408x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4047_cast_fp16 = reshape(shape = concat_408x, x = linear_144_cast_fp16)[name = string("op_4047_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_232_to_fp16 = const()[name = string("const_232_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_147_cast_fp16 = mul(x = var_4047_cast_fp16, y = const_232_to_fp16)[name = string("q_147_cast_fp16")];
+            tensor<int32, [4]> concat_409x = const()[name = string("concat_409x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4054_cast_fp16 = reshape(shape = concat_409x, x = var_4034_cast_fp16)[name = string("op_4054_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_185_cast_fp16 = mul(x = var_4054_cast_fp16, y = const_233_to_fp16)[name = string("k_185_cast_fp16")];
+            tensor<int32, [4]> concat_410x = const()[name = string("concat_410x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4061_cast_fp16 = reshape(shape = concat_410x, x = var_4037_cast_fp16)[name = string("op_4061_cast_fp16")];
+            tensor<int32, [4]> var_4062 = const()[name = string("op_4062"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_109_transpose_x_0 = const()[name = string("qk_109_transpose_x_0"), val = bool(false)];
+            bool qk_109_transpose_y_0 = const()[name = string("qk_109_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_329_perm_0 = const()[name = string("transpose_329_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_330_perm_0 = const()[name = string("transpose_330_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_330 = transpose(perm = transpose_330_perm_0, x = k_185_cast_fp16)[name = string("transpose_494")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_329 = transpose(perm = transpose_329_perm_0, x = q_147_cast_fp16)[name = string("transpose_495")];
+            tensor<fp16, [1, 20, ?, ?]> qk_109_cast_fp16 = matmul(transpose_x = qk_109_transpose_x_0, transpose_y = qk_109_transpose_y_0, x = transpose_329, y = transpose_330)[name = string("qk_109_cast_fp16")];
+            int32 concat_411_values1_0 = const()[name = string("concat_411_values1_0"), val = int32(448)];
+            int32 concat_411_axis_0 = const()[name = string("concat_411_axis_0"), val = int32(0)];
+            bool concat_411_interleave_0 = const()[name = string("concat_411_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_411 = concat(axis = concat_411_axis_0, interleave = concat_411_interleave_0, values = (gather_218_cast_uint16_to_int32, concat_411_values1_0))[name = string("concat_411")];
+            tensor<int32, [2]> var_4065_begin_0 = const()[name = string("op_4065_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4065_end_mask_0 = const()[name = string("op_4065_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4065_cast_fp16 = slice_by_index(begin = var_4065_begin_0, end = concat_411, end_mask = var_4065_end_mask_0, x = mask_to_fp16)[name = string("op_4065_cast_fp16")];
+            int32 concat_412_values0_0 = const()[name = string("concat_412_values0_0"), val = int32(0)];
+            int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)];
+            bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (concat_412_values0_0, gather_218_cast_uint16_to_int32))[name = string("concat_412")];
+            tensor<int32, [2]> var_4066_begin_0 = const()[name = string("op_4066_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4066_end_mask_0 = const()[name = string("op_4066_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4066_cast_fp16 = slice_by_index(begin = var_4066_begin_0, end = concat_412, end_mask = var_4066_end_mask_0, x = var_4065_cast_fp16)[name = string("op_4066_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_111_cast_fp16 = add(x = qk_109_cast_fp16, y = var_4066_cast_fp16)[name = string("qk_111_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_4069_cast_fp16 = softmax(axis = var_3978, x = qk_111_cast_fp16)[name = string("op_4069_cast_fp16")];
+            bool var_4071_transpose_x_0 = const()[name = string("op_4071_transpose_x_0"), val = bool(false)];
+            bool var_4071_transpose_y_0 = const()[name = string("op_4071_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_185_cast_fp16 = transpose(perm = var_4062, x = var_4061_cast_fp16)[name = string("transpose_496")];
+            tensor<fp16, [1, 20, ?, 64]> var_4071_cast_fp16 = matmul(transpose_x = var_4071_transpose_x_0, transpose_y = var_4071_transpose_y_0, x = var_4069_cast_fp16, y = v_185_cast_fp16)[name = string("op_4071_cast_fp16")];
+            tensor<int32, [4]> var_4072 = const()[name = string("op_4072"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_413x = const()[name = string("concat_413x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4073_cast_fp16 = transpose(perm = var_4072, x = var_4071_cast_fp16)[name = string("transpose_493")];
+            tensor<fp16, [1, ?, 1280]> x_331_cast_fp16 = reshape(shape = concat_413x, x = var_4073_cast_fp16)[name = string("x_331_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4077_to_fp16 = const()[name = string("op_4077_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(974524352)))];
+            tensor<fp16, [1280]> var_4078_to_fp16 = const()[name = string("op_4078_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977801216)))];
+            tensor<fp16, [1, ?, 1280]> linear_147_cast_fp16 = linear(bias = var_4078_to_fp16, weight = var_4077_to_fp16, x = x_331_cast_fp16)[name = string("linear_147_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_333_cast_fp16 = add(x = x_327_cast_fp16, y = linear_147_cast_fp16)[name = string("x_333_cast_fp16")];
+            tensor<int32, [1]> var_4085_axes_0 = const()[name = string("op_4085_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_18_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977803840)))];
+            tensor<fp16, [1280]> blocks_18_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977806464)))];
+            tensor<fp16, [1, ?, 1280]> var_4085_cast_fp16 = layer_norm(axes = var_4085_axes_0, beta = blocks_18_cross_attn_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_cross_attn_ln_weight_to_fp16, x = x_333_cast_fp16)[name = string("op_4085_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4094_to_fp16 = const()[name = string("op_4094_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977809088)))];
+            tensor<fp16, [1280]> var_4095_to_fp16 = const()[name = string("op_4095_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981085952)))];
+            tensor<fp16, [1, ?, 1280]> linear_148_cast_fp16 = linear(bias = var_4095_to_fp16, weight = var_4094_to_fp16, x = var_4085_cast_fp16)[name = string("linear_148_cast_fp16")];
+            tensor<int32, [3]> concat_414 = const()[name = string("concat_414"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_415 = const()[name = string("concat_415"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_187_internal_tensor_assign_1_stride_0 = const()[name = string("k_187_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_414, begin_mask = k_187_internal_tensor_assign_1_begin_mask_0, end = concat_415, end_mask = k_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_187_internal_tensor_assign_1_squeeze_mask_0, stride = k_187_internal_tensor_assign_1_stride_0, update = k_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("k_187_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_416 = const()[name = string("concat_416"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_417 = const()[name = string("concat_417"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_187_internal_tensor_assign_1_stride_0 = const()[name = string("v_187_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_416, begin_mask = v_187_internal_tensor_assign_1_begin_mask_0, end = concat_417, end_mask = v_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_187_internal_tensor_assign_1_squeeze_mask_0, stride = v_187_internal_tensor_assign_1_stride_0, update = v_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("v_187_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_418x = const()[name = string("concat_418x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4115_cast_fp16 = reshape(shape = concat_418x, x = linear_148_cast_fp16)[name = string("op_4115_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_234_to_fp16 = const()[name = string("const_234_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_151_cast_fp16 = mul(x = var_4115_cast_fp16, y = const_234_to_fp16)[name = string("q_151_cast_fp16")];
+            tensor<int32, [4]> var_4121 = const()[name = string("op_4121"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4122_cast_fp16 = reshape(shape = var_4121, x = k_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4122_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_235_to_fp16 = const()[name = string("const_235_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_189_cast_fp16 = mul(x = var_4122_cast_fp16, y = const_235_to_fp16)[name = string("k_189_cast_fp16")];
+            tensor<int32, [4]> var_4128 = const()[name = string("op_4128"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4129_cast_fp16 = reshape(shape = var_4128, x = v_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4129_cast_fp16")];
+            tensor<int32, [4]> var_4130 = const()[name = string("op_4130"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_113_transpose_x_0 = const()[name = string("qk_113_transpose_x_0"), val = bool(false)];
+            bool qk_113_transpose_y_0 = const()[name = string("qk_113_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_331_perm_0 = const()[name = string("transpose_331_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_332_perm_0 = const()[name = string("transpose_332_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_332 = transpose(perm = transpose_332_perm_0, x = k_189_cast_fp16)[name = string("transpose_490")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_331 = transpose(perm = transpose_331_perm_0, x = q_151_cast_fp16)[name = string("transpose_491")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_113_cast_fp16 = matmul(transpose_x = qk_113_transpose_x_0, transpose_y = qk_113_transpose_y_0, x = transpose_331, y = transpose_332)[name = string("qk_113_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_4134_cast_fp16 = softmax(axis = var_3978, x = qk_113_cast_fp16)[name = string("op_4134_cast_fp16")];
+            bool var_4136_transpose_x_0 = const()[name = string("op_4136_transpose_x_0"), val = bool(false)];
+            bool var_4136_transpose_y_0 = const()[name = string("op_4136_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_189_cast_fp16 = transpose(perm = var_4130, x = var_4129_cast_fp16)[name = string("transpose_492")];
+            tensor<fp16, [1, 20, ?, 64]> var_4136_cast_fp16 = matmul(transpose_x = var_4136_transpose_x_0, transpose_y = var_4136_transpose_y_0, x = var_4134_cast_fp16, y = v_189_cast_fp16)[name = string("op_4136_cast_fp16")];
+            tensor<int32, [4]> var_4137 = const()[name = string("op_4137"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_419x = const()[name = string("concat_419x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4138_cast_fp16 = transpose(perm = var_4137, x = var_4136_cast_fp16)[name = string("transpose_489")];
+            tensor<fp16, [1, ?, 1280]> x_337_cast_fp16 = reshape(shape = concat_419x, x = var_4138_cast_fp16)[name = string("x_337_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4142_to_fp16 = const()[name = string("op_4142_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981088576)))];
+            tensor<fp16, [1280]> var_4143_to_fp16 = const()[name = string("op_4143_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984365440)))];
+            tensor<fp16, [1, ?, 1280]> linear_149_cast_fp16 = linear(bias = var_4143_to_fp16, weight = var_4142_to_fp16, x = x_337_cast_fp16)[name = string("linear_149_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_339_cast_fp16 = add(x = x_333_cast_fp16, y = linear_149_cast_fp16)[name = string("x_339_cast_fp16")];
+            tensor<int32, [1]> var_4150_axes_0 = const()[name = string("op_4150_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984368064)))];
+            tensor<fp16, [1280]> blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984370688)))];
+            tensor<fp16, [1, ?, 1280]> var_4150_cast_fp16 = layer_norm(axes = var_4150_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_339_cast_fp16)[name = string("op_4150_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_4159_to_fp16 = const()[name = string("op_4159_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984373312)))];
+            tensor<fp16, [5120]> var_4160_to_fp16 = const()[name = string("op_4160_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997480576)))];
+            tensor<fp16, [1, ?, 5120]> linear_150_cast_fp16 = linear(bias = var_4160_to_fp16, weight = var_4159_to_fp16, x = var_4150_cast_fp16)[name = string("linear_150_cast_fp16")];
+            string x_343_mode_0 = const()[name = string("x_343_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_343_cast_fp16 = gelu(mode = x_343_mode_0, x = linear_150_cast_fp16)[name = string("x_343_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_4165_to_fp16 = const()[name = string("op_4165_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997490880)))];
+            tensor<fp16, [1280]> var_4166_to_fp16 = const()[name = string("op_4166_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010598144)))];
+            tensor<fp16, [1, ?, 1280]> linear_151_cast_fp16 = linear(bias = var_4166_to_fp16, weight = var_4165_to_fp16, x = x_343_cast_fp16)[name = string("linear_151_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_345_cast_fp16 = add(x = x_339_cast_fp16, y = linear_151_cast_fp16)[name = string("x_345_cast_fp16")];
+            tensor<int32, [4]> k_cache_77_begin_0 = const()[name = string("k_cache_77_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_77_end_0 = const()[name = string("k_cache_77_end_0"), val = tensor<int32, [4]>([20, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_77_end_mask_0 = const()[name = string("k_cache_77_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_77_squeeze_mask_0 = const()[name = string("k_cache_77_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_77_cast_fp16 = slice_by_index(begin = k_cache_77_begin_0, end = k_cache_77_end_0, end_mask = k_cache_77_end_mask_0, squeeze_mask = k_cache_77_squeeze_mask_0, x = coreml_update_state_100)[name = string("k_cache_77_cast_fp16")];
+            tensor<int32, [4]> v_cache_77_begin_0 = const()[name = string("v_cache_77_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_77_end_0 = const()[name = string("v_cache_77_end_0"), val = tensor<int32, [4]>([20, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_77_end_mask_0 = const()[name = string("v_cache_77_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_77_squeeze_mask_0 = const()[name = string("v_cache_77_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_77_cast_fp16 = slice_by_index(begin = v_cache_77_begin_0, end = v_cache_77_end_0, end_mask = v_cache_77_end_mask_0, squeeze_mask = v_cache_77_squeeze_mask_0, x = coreml_update_state_101)[name = string("v_cache_77_cast_fp16")];
+            tensor<int32, [4]> k_cache_79_begin_0 = const()[name = string("k_cache_79_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_79_end_0 = const()[name = string("k_cache_79_end_0"), val = tensor<int32, [4]>([20, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_79_end_mask_0 = const()[name = string("k_cache_79_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_79_squeeze_mask_0 = const()[name = string("k_cache_79_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_79_cast_fp16 = slice_by_index(begin = k_cache_79_begin_0, end = k_cache_79_end_0, end_mask = k_cache_79_end_mask_0, squeeze_mask = k_cache_79_squeeze_mask_0, x = read_state_2)[name = string("k_cache_79_cast_fp16")];
+            tensor<int32, [4]> v_cache_79_begin_0 = const()[name = string("v_cache_79_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_79_end_0 = const()[name = string("v_cache_79_end_0"), val = tensor<int32, [4]>([20, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_79_end_mask_0 = const()[name = string("v_cache_79_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_79_squeeze_mask_0 = const()[name = string("v_cache_79_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_79_cast_fp16 = slice_by_index(begin = v_cache_79_begin_0, end = v_cache_79_end_0, end_mask = v_cache_79_end_mask_0, squeeze_mask = v_cache_79_squeeze_mask_0, x = read_state_3)[name = string("v_cache_79_cast_fp16")];
+            int32 var_4189 = const()[name = string("op_4189"), val = int32(-1)];
+            tensor<int32, [1]> var_4207_axes_0 = const()[name = string("op_4207_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010600768)))];
+            tensor<fp16, [1280]> blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010603392)))];
+            fp16 var_4195_to_fp16 = const()[name = string("op_4195_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_4207_cast_fp16 = layer_norm(axes = var_4207_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_345_cast_fp16)[name = string("op_4207_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4218_to_fp16 = const()[name = string("op_4218_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010606016)))];
+            tensor<fp16, [1280]> var_4219_to_fp16 = const()[name = string("op_4219_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013882880)))];
+            tensor<fp16, [1, ?, 1280]> linear_152_cast_fp16 = linear(bias = var_4219_to_fp16, weight = var_4218_to_fp16, x = var_4207_cast_fp16)[name = string("linear_152_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4222_to_fp16 = const()[name = string("op_4222_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013885504)))];
+            tensor<fp16, [1, ?, 1280]> linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4222_to_fp16, x = var_4207_cast_fp16)[name = string("linear_153_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4226_to_fp16 = const()[name = string("op_4226_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017162368)))];
+            tensor<fp16, [1280]> var_4227_to_fp16 = const()[name = string("op_4227_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020439232)))];
+            tensor<fp16, [1, ?, 1280]> linear_154_cast_fp16 = linear(bias = var_4227_to_fp16, weight = var_4226_to_fp16, x = var_4207_cast_fp16)[name = string("linear_154_cast_fp16")];
+            tensor<int32, [3]> var_4229_shape_cast_fp16 = shape(x = linear_152_cast_fp16)[name = string("op_4229_shape_cast_fp16")];
+            int32 gather_230_axis_0 = const()[name = string("gather_230_axis_0"), val = int32(0)];
+            int32 gather_230_batch_dims_0 = const()[name = string("gather_230_batch_dims_0"), val = int32(0)];
+            bool gather_230_validate_indices_0 = const()[name = string("gather_230_validate_indices_0"), val = bool(false)];
+            string var_4229_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4229_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_230_to_uint16 = const()[name = string("select_230_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4229_shape_cast_fp16_to_uint16 = cast(dtype = var_4229_shape_cast_fp16_to_uint16_dtype_0, x = var_4229_shape_cast_fp16)[name = string("cast_352")];
+            uint16 gather_230_cast_uint16 = gather(axis = gather_230_axis_0, batch_dims = gather_230_batch_dims_0, indices = select_230_to_uint16, validate_indices = gather_230_validate_indices_0, x = var_4229_shape_cast_fp16_to_uint16)[name = string("gather_230_cast_uint16")];
+            string gather_230_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_230_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_230_cast_uint16_to_int32 = cast(dtype = gather_230_cast_uint16_to_int32_dtype_0, x = gather_230_cast_uint16)[name = string("cast_351")];
+            int32 end_step_41 = add(x = offset, y = gather_230_cast_uint16_to_int32)[name = string("end_step_41")];
+            tensor<int32, [1]> expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_307_axes_0 = const()[name = string("expand_dims_307_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_307 = expand_dims(axes = expand_dims_307_axes_0, x = end_step_41)[name = string("expand_dims_307")];
+            tensor<int32, [1]> concat_422_values0_0 = const()[name = string("concat_422_values0_0"), val = tensor<int32, [1]>([19])];
+            int32 concat_422_axis_0 = const()[name = string("concat_422_axis_0"), val = int32(0)];
+            bool concat_422_interleave_0 = const()[name = string("concat_422_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_422 = concat(axis = concat_422_axis_0, interleave = concat_422_interleave_0, values = (concat_422_values0_0, expand_dims_304, expand_dims_1, expand_dims_306))[name = string("concat_422")];
+            tensor<int32, [1]> concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_423_values1_0 = const()[name = string("concat_423_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_423_values3_0 = const()[name = string("concat_423_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)];
+            bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, concat_423_values1_0, expand_dims_307, concat_423_values3_0))[name = string("concat_423")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = k_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = k_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_20_stride_0, update = linear_153_cast_fp16, x = coreml_update_state_100)[name = string("k_cache1_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_20_cast_fp16, input = k_cache1)[name = string("coreml_update_state_102_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_102 = read_state(input = k_cache1)[name = string("coreml_update_state_102")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = v_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = v_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_20_stride_0, update = linear_154_cast_fp16, x = coreml_update_state_101)[name = string("v_cache1_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_20_cast_fp16, input = v_cache1)[name = string("coreml_update_state_103_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_103 = read_state(input = v_cache1)[name = string("coreml_update_state_103")];
+            int32 concat_428_values0_0 = const()[name = string("concat_428_values0_0"), val = int32(1)];
+            int32 concat_428_values2_0 = const()[name = string("concat_428_values2_0"), val = int32(1280)];
+            int32 concat_428_axis_0 = const()[name = string("concat_428_axis_0"), val = int32(0)];
+            bool concat_428_interleave_0 = const()[name = string("concat_428_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_428 = concat(axis = concat_428_axis_0, interleave = concat_428_interleave_0, values = (concat_428_values0_0, end_step_41, concat_428_values2_0))[name = string("concat_428")];
+            tensor<int32, [3]> var_4245_begin_0 = const()[name = string("op_4245_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4245_end_mask_0 = const()[name = string("op_4245_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4245_cast_fp16 = slice_by_index(begin = var_4245_begin_0, end = concat_428, end_mask = var_4245_end_mask_0, x = k_cache_77_cast_fp16)[name = string("op_4245_cast_fp16")];
+            tensor<int32, [3]> var_4248_begin_0 = const()[name = string("op_4248_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4248_end_mask_0 = const()[name = string("op_4248_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4248_cast_fp16 = slice_by_index(begin = var_4248_begin_0, end = concat_428, end_mask = var_4248_end_mask_0, x = v_cache_77_cast_fp16)[name = string("op_4248_cast_fp16")];
+            tensor<int32, [4]> concat_430x = const()[name = string("concat_430x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4258_cast_fp16 = reshape(shape = concat_430x, x = linear_152_cast_fp16)[name = string("op_4258_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_236_to_fp16 = const()[name = string("const_236_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_155_cast_fp16 = mul(x = var_4258_cast_fp16, y = const_236_to_fp16)[name = string("q_155_cast_fp16")];
+            tensor<int32, [4]> concat_431x = const()[name = string("concat_431x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4265_cast_fp16 = reshape(shape = concat_431x, x = var_4245_cast_fp16)[name = string("op_4265_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_237_to_fp16 = const()[name = string("const_237_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_195_cast_fp16 = mul(x = var_4265_cast_fp16, y = const_237_to_fp16)[name = string("k_195_cast_fp16")];
+            tensor<int32, [4]> concat_432x = const()[name = string("concat_432x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4272_cast_fp16 = reshape(shape = concat_432x, x = var_4248_cast_fp16)[name = string("op_4272_cast_fp16")];
+            tensor<int32, [4]> var_4273 = const()[name = string("op_4273"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_115_transpose_x_0 = const()[name = string("qk_115_transpose_x_0"), val = bool(false)];
+            bool qk_115_transpose_y_0 = const()[name = string("qk_115_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_333_perm_0 = const()[name = string("transpose_333_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_334_perm_0 = const()[name = string("transpose_334_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_334 = transpose(perm = transpose_334_perm_0, x = k_195_cast_fp16)[name = string("transpose_486")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_333 = transpose(perm = transpose_333_perm_0, x = q_155_cast_fp16)[name = string("transpose_487")];
+            tensor<fp16, [1, 20, ?, ?]> qk_115_cast_fp16 = matmul(transpose_x = qk_115_transpose_x_0, transpose_y = qk_115_transpose_y_0, x = transpose_333, y = transpose_334)[name = string("qk_115_cast_fp16")];
+            int32 concat_433_values1_0 = const()[name = string("concat_433_values1_0"), val = int32(448)];
+            int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)];
+            bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_230_cast_uint16_to_int32, concat_433_values1_0))[name = string("concat_433")];
+            tensor<int32, [2]> var_4276_begin_0 = const()[name = string("op_4276_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4276_end_mask_0 = const()[name = string("op_4276_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4276_cast_fp16 = slice_by_index(begin = var_4276_begin_0, end = concat_433, end_mask = var_4276_end_mask_0, x = mask_to_fp16)[name = string("op_4276_cast_fp16")];
+            int32 concat_434_values0_0 = const()[name = string("concat_434_values0_0"), val = int32(0)];
+            int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)];
+            bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (concat_434_values0_0, gather_230_cast_uint16_to_int32))[name = string("concat_434")];
+            tensor<int32, [2]> var_4277_begin_0 = const()[name = string("op_4277_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4277_end_mask_0 = const()[name = string("op_4277_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4277_cast_fp16 = slice_by_index(begin = var_4277_begin_0, end = concat_434, end_mask = var_4277_end_mask_0, x = var_4276_cast_fp16)[name = string("op_4277_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_117_cast_fp16 = add(x = qk_115_cast_fp16, y = var_4277_cast_fp16)[name = string("qk_117_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_4280_cast_fp16 = softmax(axis = var_4189, x = qk_117_cast_fp16)[name = string("op_4280_cast_fp16")];
+            bool var_4282_transpose_x_0 = const()[name = string("op_4282_transpose_x_0"), val = bool(false)];
+            bool var_4282_transpose_y_0 = const()[name = string("op_4282_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_195_cast_fp16 = transpose(perm = var_4273, x = var_4272_cast_fp16)[name = string("transpose_488")];
+            tensor<fp16, [1, 20, ?, 64]> var_4282_cast_fp16 = matmul(transpose_x = var_4282_transpose_x_0, transpose_y = var_4282_transpose_y_0, x = var_4280_cast_fp16, y = v_195_cast_fp16)[name = string("op_4282_cast_fp16")];
+            tensor<int32, [4]> var_4283 = const()[name = string("op_4283"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_435x = const()[name = string("concat_435x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4284_cast_fp16 = transpose(perm = var_4283, x = var_4282_cast_fp16)[name = string("transpose_485")];
+            tensor<fp16, [1, ?, 1280]> x_349_cast_fp16 = reshape(shape = concat_435x, x = var_4284_cast_fp16)[name = string("x_349_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4288_to_fp16 = const()[name = string("op_4288_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020441856)))];
+            tensor<fp16, [1280]> var_4289_to_fp16 = const()[name = string("op_4289_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023718720)))];
+            tensor<fp16, [1, ?, 1280]> linear_155_cast_fp16 = linear(bias = var_4289_to_fp16, weight = var_4288_to_fp16, x = x_349_cast_fp16)[name = string("linear_155_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_351_cast_fp16 = add(x = x_345_cast_fp16, y = linear_155_cast_fp16)[name = string("x_351_cast_fp16")];
+            tensor<int32, [1]> var_4296_axes_0 = const()[name = string("op_4296_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_19_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023721344)))];
+            tensor<fp16, [1280]> blocks_19_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023723968)))];
+            tensor<fp16, [1, ?, 1280]> var_4296_cast_fp16 = layer_norm(axes = var_4296_axes_0, beta = blocks_19_cross_attn_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_cross_attn_ln_weight_to_fp16, x = x_351_cast_fp16)[name = string("op_4296_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4305_to_fp16 = const()[name = string("op_4305_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023726592)))];
+            tensor<fp16, [1280]> var_4306_to_fp16 = const()[name = string("op_4306_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027003456)))];
+            tensor<fp16, [1, ?, 1280]> linear_156_cast_fp16 = linear(bias = var_4306_to_fp16, weight = var_4305_to_fp16, x = var_4296_cast_fp16)[name = string("linear_156_cast_fp16")];
+            tensor<int32, [3]> concat_436 = const()[name = string("concat_436"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_437 = const()[name = string("concat_437"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_197_internal_tensor_assign_1_stride_0 = const()[name = string("k_197_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_436, begin_mask = k_197_internal_tensor_assign_1_begin_mask_0, end = concat_437, end_mask = k_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_197_internal_tensor_assign_1_squeeze_mask_0, stride = k_197_internal_tensor_assign_1_stride_0, update = k_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("k_197_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_438 = const()[name = string("concat_438"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_439 = const()[name = string("concat_439"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_197_internal_tensor_assign_1_stride_0 = const()[name = string("v_197_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_438, begin_mask = v_197_internal_tensor_assign_1_begin_mask_0, end = concat_439, end_mask = v_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_197_internal_tensor_assign_1_squeeze_mask_0, stride = v_197_internal_tensor_assign_1_stride_0, update = v_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("v_197_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_440x = const()[name = string("concat_440x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4326_cast_fp16 = reshape(shape = concat_440x, x = linear_156_cast_fp16)[name = string("op_4326_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_238_to_fp16 = const()[name = string("const_238_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_159_cast_fp16 = mul(x = var_4326_cast_fp16, y = const_238_to_fp16)[name = string("q_159_cast_fp16")];
+            tensor<int32, [4]> var_4332 = const()[name = string("op_4332"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4333_cast_fp16 = reshape(shape = var_4332, x = k_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4333_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_239_to_fp16 = const()[name = string("const_239_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_199_cast_fp16 = mul(x = var_4333_cast_fp16, y = const_239_to_fp16)[name = string("k_199_cast_fp16")];
+            tensor<int32, [4]> var_4339 = const()[name = string("op_4339"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4340_cast_fp16 = reshape(shape = var_4339, x = v_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4340_cast_fp16")];
+            tensor<int32, [4]> var_4341 = const()[name = string("op_4341"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_119_transpose_x_0 = const()[name = string("qk_119_transpose_x_0"), val = bool(false)];
+            bool qk_119_transpose_y_0 = const()[name = string("qk_119_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_335_perm_0 = const()[name = string("transpose_335_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_336_perm_0 = const()[name = string("transpose_336_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_336 = transpose(perm = transpose_336_perm_0, x = k_199_cast_fp16)[name = string("transpose_482")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_335 = transpose(perm = transpose_335_perm_0, x = q_159_cast_fp16)[name = string("transpose_483")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_119_cast_fp16 = matmul(transpose_x = qk_119_transpose_x_0, transpose_y = qk_119_transpose_y_0, x = transpose_335, y = transpose_336)[name = string("qk_119_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_4345_cast_fp16 = softmax(axis = var_4189, x = qk_119_cast_fp16)[name = string("op_4345_cast_fp16")];
+            bool var_4347_transpose_x_0 = const()[name = string("op_4347_transpose_x_0"), val = bool(false)];
+            bool var_4347_transpose_y_0 = const()[name = string("op_4347_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_199_cast_fp16 = transpose(perm = var_4341, x = var_4340_cast_fp16)[name = string("transpose_484")];
+            tensor<fp16, [1, 20, ?, 64]> var_4347_cast_fp16 = matmul(transpose_x = var_4347_transpose_x_0, transpose_y = var_4347_transpose_y_0, x = var_4345_cast_fp16, y = v_199_cast_fp16)[name = string("op_4347_cast_fp16")];
+            tensor<int32, [4]> var_4348 = const()[name = string("op_4348"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_441x = const()[name = string("concat_441x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4349_cast_fp16 = transpose(perm = var_4348, x = var_4347_cast_fp16)[name = string("transpose_481")];
+            tensor<fp16, [1, ?, 1280]> x_355_cast_fp16 = reshape(shape = concat_441x, x = var_4349_cast_fp16)[name = string("x_355_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027006080)))];
+            tensor<fp16, [1280]> var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030282944)))];
+            tensor<fp16, [1, ?, 1280]> linear_157_cast_fp16 = linear(bias = var_4354_to_fp16, weight = var_4353_to_fp16, x = x_355_cast_fp16)[name = string("linear_157_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_357_cast_fp16 = add(x = x_351_cast_fp16, y = linear_157_cast_fp16)[name = string("x_357_cast_fp16")];
+            tensor<int32, [1]> var_4361_axes_0 = const()[name = string("op_4361_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030285568)))];
+            tensor<fp16, [1280]> blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030288192)))];
+            tensor<fp16, [1, ?, 1280]> var_4361_cast_fp16 = layer_norm(axes = var_4361_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_357_cast_fp16)[name = string("op_4361_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_4370_to_fp16 = const()[name = string("op_4370_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030290816)))];
+            tensor<fp16, [5120]> var_4371_to_fp16 = const()[name = string("op_4371_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043398080)))];
+            tensor<fp16, [1, ?, 5120]> linear_158_cast_fp16 = linear(bias = var_4371_to_fp16, weight = var_4370_to_fp16, x = var_4361_cast_fp16)[name = string("linear_158_cast_fp16")];
+            string x_361_mode_0 = const()[name = string("x_361_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_361_cast_fp16 = gelu(mode = x_361_mode_0, x = linear_158_cast_fp16)[name = string("x_361_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_4376_to_fp16 = const()[name = string("op_4376_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043408384)))];
+            tensor<fp16, [1280]> var_4377_to_fp16 = const()[name = string("op_4377_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056515648)))];
+            tensor<fp16, [1, ?, 1280]> linear_159_cast_fp16 = linear(bias = var_4377_to_fp16, weight = var_4376_to_fp16, x = x_361_cast_fp16)[name = string("linear_159_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_159_cast_fp16)[name = string("x_363_cast_fp16")];
+            tensor<int32, [4]> k_cache_81_begin_0 = const()[name = string("k_cache_81_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_81_end_0 = const()[name = string("k_cache_81_end_0"), val = tensor<int32, [4]>([21, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_81_end_mask_0 = const()[name = string("k_cache_81_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_81_squeeze_mask_0 = const()[name = string("k_cache_81_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_81_cast_fp16 = slice_by_index(begin = k_cache_81_begin_0, end = k_cache_81_end_0, end_mask = k_cache_81_end_mask_0, squeeze_mask = k_cache_81_squeeze_mask_0, x = coreml_update_state_102)[name = string("k_cache_81_cast_fp16")];
+            tensor<int32, [4]> v_cache_81_begin_0 = const()[name = string("v_cache_81_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_81_end_0 = const()[name = string("v_cache_81_end_0"), val = tensor<int32, [4]>([21, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_81_end_mask_0 = const()[name = string("v_cache_81_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_81_squeeze_mask_0 = const()[name = string("v_cache_81_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_81_cast_fp16 = slice_by_index(begin = v_cache_81_begin_0, end = v_cache_81_end_0, end_mask = v_cache_81_end_mask_0, squeeze_mask = v_cache_81_squeeze_mask_0, x = coreml_update_state_103)[name = string("v_cache_81_cast_fp16")];
+            tensor<int32, [4]> k_cache_83_begin_0 = const()[name = string("k_cache_83_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_83_end_0 = const()[name = string("k_cache_83_end_0"), val = tensor<int32, [4]>([21, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_83_end_mask_0 = const()[name = string("k_cache_83_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_83_squeeze_mask_0 = const()[name = string("k_cache_83_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_83_cast_fp16 = slice_by_index(begin = k_cache_83_begin_0, end = k_cache_83_end_0, end_mask = k_cache_83_end_mask_0, squeeze_mask = k_cache_83_squeeze_mask_0, x = read_state_2)[name = string("k_cache_83_cast_fp16")];
+            tensor<int32, [4]> v_cache_83_begin_0 = const()[name = string("v_cache_83_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_83_end_0 = const()[name = string("v_cache_83_end_0"), val = tensor<int32, [4]>([21, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_83_end_mask_0 = const()[name = string("v_cache_83_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_83_squeeze_mask_0 = const()[name = string("v_cache_83_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_83_cast_fp16 = slice_by_index(begin = v_cache_83_begin_0, end = v_cache_83_end_0, end_mask = v_cache_83_end_mask_0, squeeze_mask = v_cache_83_squeeze_mask_0, x = read_state_3)[name = string("v_cache_83_cast_fp16")];
+            int32 var_4400 = const()[name = string("op_4400"), val = int32(-1)];
+            tensor<int32, [1]> var_4418_axes_0 = const()[name = string("op_4418_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056518272)))];
+            tensor<fp16, [1280]> blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056520896)))];
+            fp16 var_4406_to_fp16 = const()[name = string("op_4406_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_4418_cast_fp16 = layer_norm(axes = var_4418_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_363_cast_fp16)[name = string("op_4418_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4429_to_fp16 = const()[name = string("op_4429_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056523520)))];
+            tensor<fp16, [1280]> var_4430_to_fp16 = const()[name = string("op_4430_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059800384)))];
+            tensor<fp16, [1, ?, 1280]> linear_160_cast_fp16 = linear(bias = var_4430_to_fp16, weight = var_4429_to_fp16, x = var_4418_cast_fp16)[name = string("linear_160_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4433_to_fp16 = const()[name = string("op_4433_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059803008)))];
+            tensor<fp16, [1, ?, 1280]> linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4433_to_fp16, x = var_4418_cast_fp16)[name = string("linear_161_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4437_to_fp16 = const()[name = string("op_4437_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1063079872)))];
+            tensor<fp16, [1280]> var_4438_to_fp16 = const()[name = string("op_4438_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066356736)))];
+            tensor<fp16, [1, ?, 1280]> linear_162_cast_fp16 = linear(bias = var_4438_to_fp16, weight = var_4437_to_fp16, x = var_4418_cast_fp16)[name = string("linear_162_cast_fp16")];
+            tensor<int32, [3]> var_4440_shape_cast_fp16 = shape(x = linear_160_cast_fp16)[name = string("op_4440_shape_cast_fp16")];
+            int32 gather_242_axis_0 = const()[name = string("gather_242_axis_0"), val = int32(0)];
+            int32 gather_242_batch_dims_0 = const()[name = string("gather_242_batch_dims_0"), val = int32(0)];
+            bool gather_242_validate_indices_0 = const()[name = string("gather_242_validate_indices_0"), val = bool(false)];
+            string var_4440_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4440_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_242_to_uint16 = const()[name = string("select_242_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4440_shape_cast_fp16_to_uint16 = cast(dtype = var_4440_shape_cast_fp16_to_uint16_dtype_0, x = var_4440_shape_cast_fp16)[name = string("cast_350")];
+            uint16 gather_242_cast_uint16 = gather(axis = gather_242_axis_0, batch_dims = gather_242_batch_dims_0, indices = select_242_to_uint16, validate_indices = gather_242_validate_indices_0, x = var_4440_shape_cast_fp16_to_uint16)[name = string("gather_242_cast_uint16")];
+            string gather_242_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_242_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_242_cast_uint16_to_int32 = cast(dtype = gather_242_cast_uint16_to_int32_dtype_0, x = gather_242_cast_uint16)[name = string("cast_349")];
+            int32 end_step_43 = add(x = offset, y = gather_242_cast_uint16_to_int32)[name = string("end_step_43")];
+            tensor<int32, [1]> expand_dims_320 = const()[name = string("expand_dims_320"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_323_axes_0 = const()[name = string("expand_dims_323_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_323 = expand_dims(axes = expand_dims_323_axes_0, x = end_step_43)[name = string("expand_dims_323")];
+            tensor<int32, [1]> concat_444_values0_0 = const()[name = string("concat_444_values0_0"), val = tensor<int32, [1]>([20])];
+            int32 concat_444_axis_0 = const()[name = string("concat_444_axis_0"), val = int32(0)];
+            bool concat_444_interleave_0 = const()[name = string("concat_444_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_444 = concat(axis = concat_444_axis_0, interleave = concat_444_interleave_0, values = (concat_444_values0_0, expand_dims_320, expand_dims_1, expand_dims_322))[name = string("concat_444")];
+            tensor<int32, [1]> concat_445_values0_0 = const()[name = string("concat_445_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_445_values1_0 = const()[name = string("concat_445_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_445_values3_0 = const()[name = string("concat_445_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_445_axis_0 = const()[name = string("concat_445_axis_0"), val = int32(0)];
+            bool concat_445_interleave_0 = const()[name = string("concat_445_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_445 = concat(axis = concat_445_axis_0, interleave = concat_445_interleave_0, values = (concat_445_values0_0, concat_445_values1_0, expand_dims_323, concat_445_values3_0))[name = string("concat_445")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = k_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = k_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_21_stride_0, update = linear_161_cast_fp16, x = coreml_update_state_102)[name = string("k_cache1_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_21_cast_fp16, input = k_cache1)[name = string("coreml_update_state_104_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_104 = read_state(input = k_cache1)[name = string("coreml_update_state_104")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = v_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = v_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_21_stride_0, update = linear_162_cast_fp16, x = coreml_update_state_103)[name = string("v_cache1_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_21_cast_fp16, input = v_cache1)[name = string("coreml_update_state_105_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_105 = read_state(input = v_cache1)[name = string("coreml_update_state_105")];
+            int32 concat_450_values0_0 = const()[name = string("concat_450_values0_0"), val = int32(1)];
+            int32 concat_450_values2_0 = const()[name = string("concat_450_values2_0"), val = int32(1280)];
+            int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)];
+            bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (concat_450_values0_0, end_step_43, concat_450_values2_0))[name = string("concat_450")];
+            tensor<int32, [3]> var_4456_begin_0 = const()[name = string("op_4456_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4456_end_mask_0 = const()[name = string("op_4456_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4456_cast_fp16 = slice_by_index(begin = var_4456_begin_0, end = concat_450, end_mask = var_4456_end_mask_0, x = k_cache_81_cast_fp16)[name = string("op_4456_cast_fp16")];
+            tensor<int32, [3]> var_4459_begin_0 = const()[name = string("op_4459_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4459_end_mask_0 = const()[name = string("op_4459_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4459_cast_fp16 = slice_by_index(begin = var_4459_begin_0, end = concat_450, end_mask = var_4459_end_mask_0, x = v_cache_81_cast_fp16)[name = string("op_4459_cast_fp16")];
+            tensor<int32, [4]> concat_452x = const()[name = string("concat_452x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4469_cast_fp16 = reshape(shape = concat_452x, x = linear_160_cast_fp16)[name = string("op_4469_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_163_cast_fp16 = mul(x = var_4469_cast_fp16, y = const_240_to_fp16)[name = string("q_163_cast_fp16")];
+            tensor<int32, [4]> concat_453x = const()[name = string("concat_453x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4476_cast_fp16 = reshape(shape = concat_453x, x = var_4456_cast_fp16)[name = string("op_4476_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_205_cast_fp16 = mul(x = var_4476_cast_fp16, y = const_241_to_fp16)[name = string("k_205_cast_fp16")];
+            tensor<int32, [4]> concat_454x = const()[name = string("concat_454x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4483_cast_fp16 = reshape(shape = concat_454x, x = var_4459_cast_fp16)[name = string("op_4483_cast_fp16")];
+            tensor<int32, [4]> var_4484 = const()[name = string("op_4484"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_121_transpose_x_0 = const()[name = string("qk_121_transpose_x_0"), val = bool(false)];
+            bool qk_121_transpose_y_0 = const()[name = string("qk_121_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_337_perm_0 = const()[name = string("transpose_337_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_338_perm_0 = const()[name = string("transpose_338_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_338 = transpose(perm = transpose_338_perm_0, x = k_205_cast_fp16)[name = string("transpose_478")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_337 = transpose(perm = transpose_337_perm_0, x = q_163_cast_fp16)[name = string("transpose_479")];
+            tensor<fp16, [1, 20, ?, ?]> qk_121_cast_fp16 = matmul(transpose_x = qk_121_transpose_x_0, transpose_y = qk_121_transpose_y_0, x = transpose_337, y = transpose_338)[name = string("qk_121_cast_fp16")];
+            int32 concat_455_values1_0 = const()[name = string("concat_455_values1_0"), val = int32(448)];
+            int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)];
+            bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (gather_242_cast_uint16_to_int32, concat_455_values1_0))[name = string("concat_455")];
+            tensor<int32, [2]> var_4487_begin_0 = const()[name = string("op_4487_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4487_end_mask_0 = const()[name = string("op_4487_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4487_cast_fp16 = slice_by_index(begin = var_4487_begin_0, end = concat_455, end_mask = var_4487_end_mask_0, x = mask_to_fp16)[name = string("op_4487_cast_fp16")];
+            int32 concat_456_values0_0 = const()[name = string("concat_456_values0_0"), val = int32(0)];
+            int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)];
+            bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (concat_456_values0_0, gather_242_cast_uint16_to_int32))[name = string("concat_456")];
+            tensor<int32, [2]> var_4488_begin_0 = const()[name = string("op_4488_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4488_end_mask_0 = const()[name = string("op_4488_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4488_cast_fp16 = slice_by_index(begin = var_4488_begin_0, end = concat_456, end_mask = var_4488_end_mask_0, x = var_4487_cast_fp16)[name = string("op_4488_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_123_cast_fp16 = add(x = qk_121_cast_fp16, y = var_4488_cast_fp16)[name = string("qk_123_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_4491_cast_fp16 = softmax(axis = var_4400, x = qk_123_cast_fp16)[name = string("op_4491_cast_fp16")];
+            bool var_4493_transpose_x_0 = const()[name = string("op_4493_transpose_x_0"), val = bool(false)];
+            bool var_4493_transpose_y_0 = const()[name = string("op_4493_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_205_cast_fp16 = transpose(perm = var_4484, x = var_4483_cast_fp16)[name = string("transpose_480")];
+            tensor<fp16, [1, 20, ?, 64]> var_4493_cast_fp16 = matmul(transpose_x = var_4493_transpose_x_0, transpose_y = var_4493_transpose_y_0, x = var_4491_cast_fp16, y = v_205_cast_fp16)[name = string("op_4493_cast_fp16")];
+            tensor<int32, [4]> var_4494 = const()[name = string("op_4494"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_457x = const()[name = string("concat_457x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4495_cast_fp16 = transpose(perm = var_4494, x = var_4493_cast_fp16)[name = string("transpose_477")];
+            tensor<fp16, [1, ?, 1280]> x_367_cast_fp16 = reshape(shape = concat_457x, x = var_4495_cast_fp16)[name = string("x_367_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4499_to_fp16 = const()[name = string("op_4499_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066359360)))];
+            tensor<fp16, [1280]> var_4500_to_fp16 = const()[name = string("op_4500_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069636224)))];
+            tensor<fp16, [1, ?, 1280]> linear_163_cast_fp16 = linear(bias = var_4500_to_fp16, weight = var_4499_to_fp16, x = x_367_cast_fp16)[name = string("linear_163_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_369_cast_fp16 = add(x = x_363_cast_fp16, y = linear_163_cast_fp16)[name = string("x_369_cast_fp16")];
+            tensor<int32, [1]> var_4507_axes_0 = const()[name = string("op_4507_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_20_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069638848)))];
+            tensor<fp16, [1280]> blocks_20_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069641472)))];
+            tensor<fp16, [1, ?, 1280]> var_4507_cast_fp16 = layer_norm(axes = var_4507_axes_0, beta = blocks_20_cross_attn_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_cross_attn_ln_weight_to_fp16, x = x_369_cast_fp16)[name = string("op_4507_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4516_to_fp16 = const()[name = string("op_4516_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069644096)))];
+            tensor<fp16, [1280]> var_4517_to_fp16 = const()[name = string("op_4517_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072920960)))];
+            tensor<fp16, [1, ?, 1280]> linear_164_cast_fp16 = linear(bias = var_4517_to_fp16, weight = var_4516_to_fp16, x = var_4507_cast_fp16)[name = string("linear_164_cast_fp16")];
+            tensor<int32, [3]> concat_458 = const()[name = string("concat_458"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_459 = const()[name = string("concat_459"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_207_internal_tensor_assign_1_stride_0 = const()[name = string("k_207_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_458, begin_mask = k_207_internal_tensor_assign_1_begin_mask_0, end = concat_459, end_mask = k_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_207_internal_tensor_assign_1_squeeze_mask_0, stride = k_207_internal_tensor_assign_1_stride_0, update = k_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("k_207_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_460 = const()[name = string("concat_460"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_461 = const()[name = string("concat_461"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_207_internal_tensor_assign_1_stride_0 = const()[name = string("v_207_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_460, begin_mask = v_207_internal_tensor_assign_1_begin_mask_0, end = concat_461, end_mask = v_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_207_internal_tensor_assign_1_squeeze_mask_0, stride = v_207_internal_tensor_assign_1_stride_0, update = v_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("v_207_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_462x = const()[name = string("concat_462x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4537_cast_fp16 = reshape(shape = concat_462x, x = linear_164_cast_fp16)[name = string("op_4537_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_242_to_fp16 = const()[name = string("const_242_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_167_cast_fp16 = mul(x = var_4537_cast_fp16, y = const_242_to_fp16)[name = string("q_167_cast_fp16")];
+            tensor<int32, [4]> var_4543 = const()[name = string("op_4543"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4544_cast_fp16 = reshape(shape = var_4543, x = k_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4544_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_209_cast_fp16 = mul(x = var_4544_cast_fp16, y = const_243_to_fp16)[name = string("k_209_cast_fp16")];
+            tensor<int32, [4]> var_4550 = const()[name = string("op_4550"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4551_cast_fp16 = reshape(shape = var_4550, x = v_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4551_cast_fp16")];
+            tensor<int32, [4]> var_4552 = const()[name = string("op_4552"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_125_transpose_x_0 = const()[name = string("qk_125_transpose_x_0"), val = bool(false)];
+            bool qk_125_transpose_y_0 = const()[name = string("qk_125_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_339_perm_0 = const()[name = string("transpose_339_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_340_perm_0 = const()[name = string("transpose_340_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_340 = transpose(perm = transpose_340_perm_0, x = k_209_cast_fp16)[name = string("transpose_474")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_339 = transpose(perm = transpose_339_perm_0, x = q_167_cast_fp16)[name = string("transpose_475")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_125_cast_fp16 = matmul(transpose_x = qk_125_transpose_x_0, transpose_y = qk_125_transpose_y_0, x = transpose_339, y = transpose_340)[name = string("qk_125_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_4556_cast_fp16 = softmax(axis = var_4400, x = qk_125_cast_fp16)[name = string("op_4556_cast_fp16")];
+            bool var_4558_transpose_x_0 = const()[name = string("op_4558_transpose_x_0"), val = bool(false)];
+            bool var_4558_transpose_y_0 = const()[name = string("op_4558_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_209_cast_fp16 = transpose(perm = var_4552, x = var_4551_cast_fp16)[name = string("transpose_476")];
+            tensor<fp16, [1, 20, ?, 64]> var_4558_cast_fp16 = matmul(transpose_x = var_4558_transpose_x_0, transpose_y = var_4558_transpose_y_0, x = var_4556_cast_fp16, y = v_209_cast_fp16)[name = string("op_4558_cast_fp16")];
+            tensor<int32, [4]> var_4559 = const()[name = string("op_4559"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_463x = const()[name = string("concat_463x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4560_cast_fp16 = transpose(perm = var_4559, x = var_4558_cast_fp16)[name = string("transpose_473")];
+            tensor<fp16, [1, ?, 1280]> x_373_cast_fp16 = reshape(shape = concat_463x, x = var_4560_cast_fp16)[name = string("x_373_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4564_to_fp16 = const()[name = string("op_4564_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072923584)))];
+            tensor<fp16, [1280]> var_4565_to_fp16 = const()[name = string("op_4565_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076200448)))];
+            tensor<fp16, [1, ?, 1280]> linear_165_cast_fp16 = linear(bias = var_4565_to_fp16, weight = var_4564_to_fp16, x = x_373_cast_fp16)[name = string("linear_165_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_375_cast_fp16 = add(x = x_369_cast_fp16, y = linear_165_cast_fp16)[name = string("x_375_cast_fp16")];
+            tensor<int32, [1]> var_4572_axes_0 = const()[name = string("op_4572_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076203072)))];
+            tensor<fp16, [1280]> blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076205696)))];
+            tensor<fp16, [1, ?, 1280]> var_4572_cast_fp16 = layer_norm(axes = var_4572_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_375_cast_fp16)[name = string("op_4572_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_4581_to_fp16 = const()[name = string("op_4581_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076208320)))];
+            tensor<fp16, [5120]> var_4582_to_fp16 = const()[name = string("op_4582_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1089315584)))];
+            tensor<fp16, [1, ?, 5120]> linear_166_cast_fp16 = linear(bias = var_4582_to_fp16, weight = var_4581_to_fp16, x = var_4572_cast_fp16)[name = string("linear_166_cast_fp16")];
+            string x_379_mode_0 = const()[name = string("x_379_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_379_cast_fp16 = gelu(mode = x_379_mode_0, x = linear_166_cast_fp16)[name = string("x_379_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_4587_to_fp16 = const()[name = string("op_4587_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1089325888)))];
+            tensor<fp16, [1280]> var_4588_to_fp16 = const()[name = string("op_4588_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102433152)))];
+            tensor<fp16, [1, ?, 1280]> linear_167_cast_fp16 = linear(bias = var_4588_to_fp16, weight = var_4587_to_fp16, x = x_379_cast_fp16)[name = string("linear_167_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_381_cast_fp16 = add(x = x_375_cast_fp16, y = linear_167_cast_fp16)[name = string("x_381_cast_fp16")];
+            tensor<int32, [4]> k_cache_85_begin_0 = const()[name = string("k_cache_85_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_85_end_0 = const()[name = string("k_cache_85_end_0"), val = tensor<int32, [4]>([22, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_85_end_mask_0 = const()[name = string("k_cache_85_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_85_squeeze_mask_0 = const()[name = string("k_cache_85_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_85_cast_fp16 = slice_by_index(begin = k_cache_85_begin_0, end = k_cache_85_end_0, end_mask = k_cache_85_end_mask_0, squeeze_mask = k_cache_85_squeeze_mask_0, x = coreml_update_state_104)[name = string("k_cache_85_cast_fp16")];
+            tensor<int32, [4]> v_cache_85_begin_0 = const()[name = string("v_cache_85_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_85_end_0 = const()[name = string("v_cache_85_end_0"), val = tensor<int32, [4]>([22, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_85_end_mask_0 = const()[name = string("v_cache_85_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_85_squeeze_mask_0 = const()[name = string("v_cache_85_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_85_cast_fp16 = slice_by_index(begin = v_cache_85_begin_0, end = v_cache_85_end_0, end_mask = v_cache_85_end_mask_0, squeeze_mask = v_cache_85_squeeze_mask_0, x = coreml_update_state_105)[name = string("v_cache_85_cast_fp16")];
+            tensor<int32, [4]> k_cache_87_begin_0 = const()[name = string("k_cache_87_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_87_end_0 = const()[name = string("k_cache_87_end_0"), val = tensor<int32, [4]>([22, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_87_end_mask_0 = const()[name = string("k_cache_87_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_87_squeeze_mask_0 = const()[name = string("k_cache_87_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_87_cast_fp16 = slice_by_index(begin = k_cache_87_begin_0, end = k_cache_87_end_0, end_mask = k_cache_87_end_mask_0, squeeze_mask = k_cache_87_squeeze_mask_0, x = read_state_2)[name = string("k_cache_87_cast_fp16")];
+            tensor<int32, [4]> v_cache_87_begin_0 = const()[name = string("v_cache_87_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_87_end_0 = const()[name = string("v_cache_87_end_0"), val = tensor<int32, [4]>([22, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_87_end_mask_0 = const()[name = string("v_cache_87_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_87_squeeze_mask_0 = const()[name = string("v_cache_87_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_87_cast_fp16 = slice_by_index(begin = v_cache_87_begin_0, end = v_cache_87_end_0, end_mask = v_cache_87_end_mask_0, squeeze_mask = v_cache_87_squeeze_mask_0, x = read_state_3)[name = string("v_cache_87_cast_fp16")];
+            int32 var_4611 = const()[name = string("op_4611"), val = int32(-1)];
+            tensor<int32, [1]> var_4629_axes_0 = const()[name = string("op_4629_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102435776)))];
+            tensor<fp16, [1280]> blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102438400)))];
+            fp16 var_4617_to_fp16 = const()[name = string("op_4617_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_4629_cast_fp16 = layer_norm(axes = var_4629_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_381_cast_fp16)[name = string("op_4629_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4640_to_fp16 = const()[name = string("op_4640_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102441024)))];
+            tensor<fp16, [1280]> var_4641_to_fp16 = const()[name = string("op_4641_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105717888)))];
+            tensor<fp16, [1, ?, 1280]> linear_168_cast_fp16 = linear(bias = var_4641_to_fp16, weight = var_4640_to_fp16, x = var_4629_cast_fp16)[name = string("linear_168_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4644_to_fp16 = const()[name = string("op_4644_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105720512)))];
+            tensor<fp16, [1, ?, 1280]> linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4644_to_fp16, x = var_4629_cast_fp16)[name = string("linear_169_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4648_to_fp16 = const()[name = string("op_4648_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1108997376)))];
+            tensor<fp16, [1280]> var_4649_to_fp16 = const()[name = string("op_4649_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112274240)))];
+            tensor<fp16, [1, ?, 1280]> linear_170_cast_fp16 = linear(bias = var_4649_to_fp16, weight = var_4648_to_fp16, x = var_4629_cast_fp16)[name = string("linear_170_cast_fp16")];
+            tensor<int32, [3]> var_4651_shape_cast_fp16 = shape(x = linear_168_cast_fp16)[name = string("op_4651_shape_cast_fp16")];
+            int32 gather_254_axis_0 = const()[name = string("gather_254_axis_0"), val = int32(0)];
+            int32 gather_254_batch_dims_0 = const()[name = string("gather_254_batch_dims_0"), val = int32(0)];
+            bool gather_254_validate_indices_0 = const()[name = string("gather_254_validate_indices_0"), val = bool(false)];
+            string var_4651_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4651_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_254_to_uint16 = const()[name = string("select_254_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4651_shape_cast_fp16_to_uint16 = cast(dtype = var_4651_shape_cast_fp16_to_uint16_dtype_0, x = var_4651_shape_cast_fp16)[name = string("cast_348")];
+            uint16 gather_254_cast_uint16 = gather(axis = gather_254_axis_0, batch_dims = gather_254_batch_dims_0, indices = select_254_to_uint16, validate_indices = gather_254_validate_indices_0, x = var_4651_shape_cast_fp16_to_uint16)[name = string("gather_254_cast_uint16")];
+            string gather_254_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_254_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_254_cast_uint16_to_int32 = cast(dtype = gather_254_cast_uint16_to_int32_dtype_0, x = gather_254_cast_uint16)[name = string("cast_347")];
+            int32 end_step_45 = add(x = offset, y = gather_254_cast_uint16_to_int32)[name = string("end_step_45")];
+            tensor<int32, [1]> expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_338 = const()[name = string("expand_dims_338"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_339_axes_0 = const()[name = string("expand_dims_339_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_339 = expand_dims(axes = expand_dims_339_axes_0, x = end_step_45)[name = string("expand_dims_339")];
+            tensor<int32, [1]> concat_466_values0_0 = const()[name = string("concat_466_values0_0"), val = tensor<int32, [1]>([21])];
+            int32 concat_466_axis_0 = const()[name = string("concat_466_axis_0"), val = int32(0)];
+            bool concat_466_interleave_0 = const()[name = string("concat_466_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_466 = concat(axis = concat_466_axis_0, interleave = concat_466_interleave_0, values = (concat_466_values0_0, expand_dims_336, expand_dims_1, expand_dims_338))[name = string("concat_466")];
+            tensor<int32, [1]> concat_467_values0_0 = const()[name = string("concat_467_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_467_values1_0 = const()[name = string("concat_467_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_467_values3_0 = const()[name = string("concat_467_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_467_axis_0 = const()[name = string("concat_467_axis_0"), val = int32(0)];
+            bool concat_467_interleave_0 = const()[name = string("concat_467_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_467 = concat(axis = concat_467_axis_0, interleave = concat_467_interleave_0, values = (concat_467_values0_0, concat_467_values1_0, expand_dims_339, concat_467_values3_0))[name = string("concat_467")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = k_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = k_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_22_stride_0, update = linear_169_cast_fp16, x = coreml_update_state_104)[name = string("k_cache1_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_22_cast_fp16, input = k_cache1)[name = string("coreml_update_state_106_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_106 = read_state(input = k_cache1)[name = string("coreml_update_state_106")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = v_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = v_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_22_stride_0, update = linear_170_cast_fp16, x = coreml_update_state_105)[name = string("v_cache1_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_22_cast_fp16, input = v_cache1)[name = string("coreml_update_state_107_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_107 = read_state(input = v_cache1)[name = string("coreml_update_state_107")];
+            int32 concat_472_values0_0 = const()[name = string("concat_472_values0_0"), val = int32(1)];
+            int32 concat_472_values2_0 = const()[name = string("concat_472_values2_0"), val = int32(1280)];
+            int32 concat_472_axis_0 = const()[name = string("concat_472_axis_0"), val = int32(0)];
+            bool concat_472_interleave_0 = const()[name = string("concat_472_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_472 = concat(axis = concat_472_axis_0, interleave = concat_472_interleave_0, values = (concat_472_values0_0, end_step_45, concat_472_values2_0))[name = string("concat_472")];
+            tensor<int32, [3]> var_4667_begin_0 = const()[name = string("op_4667_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4667_end_mask_0 = const()[name = string("op_4667_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4667_cast_fp16 = slice_by_index(begin = var_4667_begin_0, end = concat_472, end_mask = var_4667_end_mask_0, x = k_cache_85_cast_fp16)[name = string("op_4667_cast_fp16")];
+            tensor<int32, [3]> var_4670_begin_0 = const()[name = string("op_4670_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4670_end_mask_0 = const()[name = string("op_4670_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4670_cast_fp16 = slice_by_index(begin = var_4670_begin_0, end = concat_472, end_mask = var_4670_end_mask_0, x = v_cache_85_cast_fp16)[name = string("op_4670_cast_fp16")];
+            tensor<int32, [4]> concat_474x = const()[name = string("concat_474x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4680_cast_fp16 = reshape(shape = concat_474x, x = linear_168_cast_fp16)[name = string("op_4680_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_244_to_fp16 = const()[name = string("const_244_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_171_cast_fp16 = mul(x = var_4680_cast_fp16, y = const_244_to_fp16)[name = string("q_171_cast_fp16")];
+            tensor<int32, [4]> concat_475x = const()[name = string("concat_475x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4687_cast_fp16 = reshape(shape = concat_475x, x = var_4667_cast_fp16)[name = string("op_4687_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_215_cast_fp16 = mul(x = var_4687_cast_fp16, y = const_245_to_fp16)[name = string("k_215_cast_fp16")];
+            tensor<int32, [4]> concat_476x = const()[name = string("concat_476x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4694_cast_fp16 = reshape(shape = concat_476x, x = var_4670_cast_fp16)[name = string("op_4694_cast_fp16")];
+            tensor<int32, [4]> var_4695 = const()[name = string("op_4695"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_127_transpose_x_0 = const()[name = string("qk_127_transpose_x_0"), val = bool(false)];
+            bool qk_127_transpose_y_0 = const()[name = string("qk_127_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_341_perm_0 = const()[name = string("transpose_341_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_342_perm_0 = const()[name = string("transpose_342_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_342 = transpose(perm = transpose_342_perm_0, x = k_215_cast_fp16)[name = string("transpose_470")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_341 = transpose(perm = transpose_341_perm_0, x = q_171_cast_fp16)[name = string("transpose_471")];
+            tensor<fp16, [1, 20, ?, ?]> qk_127_cast_fp16 = matmul(transpose_x = qk_127_transpose_x_0, transpose_y = qk_127_transpose_y_0, x = transpose_341, y = transpose_342)[name = string("qk_127_cast_fp16")];
+            int32 concat_477_values1_0 = const()[name = string("concat_477_values1_0"), val = int32(448)];
+            int32 concat_477_axis_0 = const()[name = string("concat_477_axis_0"), val = int32(0)];
+            bool concat_477_interleave_0 = const()[name = string("concat_477_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_477 = concat(axis = concat_477_axis_0, interleave = concat_477_interleave_0, values = (gather_254_cast_uint16_to_int32, concat_477_values1_0))[name = string("concat_477")];
+            tensor<int32, [2]> var_4698_begin_0 = const()[name = string("op_4698_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4698_end_mask_0 = const()[name = string("op_4698_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4698_cast_fp16 = slice_by_index(begin = var_4698_begin_0, end = concat_477, end_mask = var_4698_end_mask_0, x = mask_to_fp16)[name = string("op_4698_cast_fp16")];
+            int32 concat_478_values0_0 = const()[name = string("concat_478_values0_0"), val = int32(0)];
+            int32 concat_478_axis_0 = const()[name = string("concat_478_axis_0"), val = int32(0)];
+            bool concat_478_interleave_0 = const()[name = string("concat_478_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_478 = concat(axis = concat_478_axis_0, interleave = concat_478_interleave_0, values = (concat_478_values0_0, gather_254_cast_uint16_to_int32))[name = string("concat_478")];
+            tensor<int32, [2]> var_4699_begin_0 = const()[name = string("op_4699_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4699_end_mask_0 = const()[name = string("op_4699_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4699_cast_fp16 = slice_by_index(begin = var_4699_begin_0, end = concat_478, end_mask = var_4699_end_mask_0, x = var_4698_cast_fp16)[name = string("op_4699_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_129_cast_fp16 = add(x = qk_127_cast_fp16, y = var_4699_cast_fp16)[name = string("qk_129_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_4702_cast_fp16 = softmax(axis = var_4611, x = qk_129_cast_fp16)[name = string("op_4702_cast_fp16")];
+            bool var_4704_transpose_x_0 = const()[name = string("op_4704_transpose_x_0"), val = bool(false)];
+            bool var_4704_transpose_y_0 = const()[name = string("op_4704_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_215_cast_fp16 = transpose(perm = var_4695, x = var_4694_cast_fp16)[name = string("transpose_472")];
+            tensor<fp16, [1, 20, ?, 64]> var_4704_cast_fp16 = matmul(transpose_x = var_4704_transpose_x_0, transpose_y = var_4704_transpose_y_0, x = var_4702_cast_fp16, y = v_215_cast_fp16)[name = string("op_4704_cast_fp16")];
+            tensor<int32, [4]> var_4705 = const()[name = string("op_4705"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_479x = const()[name = string("concat_479x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4706_cast_fp16 = transpose(perm = var_4705, x = var_4704_cast_fp16)[name = string("transpose_469")];
+            tensor<fp16, [1, ?, 1280]> x_385_cast_fp16 = reshape(shape = concat_479x, x = var_4706_cast_fp16)[name = string("x_385_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4710_to_fp16 = const()[name = string("op_4710_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112276864)))];
+            tensor<fp16, [1280]> var_4711_to_fp16 = const()[name = string("op_4711_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115553728)))];
+            tensor<fp16, [1, ?, 1280]> linear_171_cast_fp16 = linear(bias = var_4711_to_fp16, weight = var_4710_to_fp16, x = x_385_cast_fp16)[name = string("linear_171_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_387_cast_fp16 = add(x = x_381_cast_fp16, y = linear_171_cast_fp16)[name = string("x_387_cast_fp16")];
+            tensor<int32, [1]> var_4718_axes_0 = const()[name = string("op_4718_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_21_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115556352)))];
+            tensor<fp16, [1280]> blocks_21_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115558976)))];
+            tensor<fp16, [1, ?, 1280]> var_4718_cast_fp16 = layer_norm(axes = var_4718_axes_0, beta = blocks_21_cross_attn_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_cross_attn_ln_weight_to_fp16, x = x_387_cast_fp16)[name = string("op_4718_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4727_to_fp16 = const()[name = string("op_4727_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115561600)))];
+            tensor<fp16, [1280]> var_4728_to_fp16 = const()[name = string("op_4728_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1118838464)))];
+            tensor<fp16, [1, ?, 1280]> linear_172_cast_fp16 = linear(bias = var_4728_to_fp16, weight = var_4727_to_fp16, x = var_4718_cast_fp16)[name = string("linear_172_cast_fp16")];
+            tensor<int32, [3]> concat_480 = const()[name = string("concat_480"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_481 = const()[name = string("concat_481"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_217_internal_tensor_assign_1_stride_0 = const()[name = string("k_217_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_480, begin_mask = k_217_internal_tensor_assign_1_begin_mask_0, end = concat_481, end_mask = k_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_217_internal_tensor_assign_1_squeeze_mask_0, stride = k_217_internal_tensor_assign_1_stride_0, update = k_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("k_217_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_482 = const()[name = string("concat_482"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_483 = const()[name = string("concat_483"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_217_internal_tensor_assign_1_stride_0 = const()[name = string("v_217_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_482, begin_mask = v_217_internal_tensor_assign_1_begin_mask_0, end = concat_483, end_mask = v_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_217_internal_tensor_assign_1_squeeze_mask_0, stride = v_217_internal_tensor_assign_1_stride_0, update = v_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("v_217_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_484x = const()[name = string("concat_484x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4748_cast_fp16 = reshape(shape = concat_484x, x = linear_172_cast_fp16)[name = string("op_4748_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_246_to_fp16 = const()[name = string("const_246_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_175_cast_fp16 = mul(x = var_4748_cast_fp16, y = const_246_to_fp16)[name = string("q_175_cast_fp16")];
+            tensor<int32, [4]> var_4754 = const()[name = string("op_4754"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4755_cast_fp16 = reshape(shape = var_4754, x = k_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4755_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_247_to_fp16 = const()[name = string("const_247_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_219_cast_fp16 = mul(x = var_4755_cast_fp16, y = const_247_to_fp16)[name = string("k_219_cast_fp16")];
+            tensor<int32, [4]> var_4761 = const()[name = string("op_4761"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4762_cast_fp16 = reshape(shape = var_4761, x = v_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4762_cast_fp16")];
+            tensor<int32, [4]> var_4763 = const()[name = string("op_4763"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_131_transpose_x_0 = const()[name = string("qk_131_transpose_x_0"), val = bool(false)];
+            bool qk_131_transpose_y_0 = const()[name = string("qk_131_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_343_perm_0 = const()[name = string("transpose_343_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_344_perm_0 = const()[name = string("transpose_344_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_344 = transpose(perm = transpose_344_perm_0, x = k_219_cast_fp16)[name = string("transpose_466")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_343 = transpose(perm = transpose_343_perm_0, x = q_175_cast_fp16)[name = string("transpose_467")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_131_cast_fp16 = matmul(transpose_x = qk_131_transpose_x_0, transpose_y = qk_131_transpose_y_0, x = transpose_343, y = transpose_344)[name = string("qk_131_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_4767_cast_fp16 = softmax(axis = var_4611, x = qk_131_cast_fp16)[name = string("op_4767_cast_fp16")];
+            bool var_4769_transpose_x_0 = const()[name = string("op_4769_transpose_x_0"), val = bool(false)];
+            bool var_4769_transpose_y_0 = const()[name = string("op_4769_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_219_cast_fp16 = transpose(perm = var_4763, x = var_4762_cast_fp16)[name = string("transpose_468")];
+            tensor<fp16, [1, 20, ?, 64]> var_4769_cast_fp16 = matmul(transpose_x = var_4769_transpose_x_0, transpose_y = var_4769_transpose_y_0, x = var_4767_cast_fp16, y = v_219_cast_fp16)[name = string("op_4769_cast_fp16")];
+            tensor<int32, [4]> var_4770 = const()[name = string("op_4770"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_485x = const()[name = string("concat_485x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4771_cast_fp16 = transpose(perm = var_4770, x = var_4769_cast_fp16)[name = string("transpose_465")];
+            tensor<fp16, [1, ?, 1280]> x_391_cast_fp16 = reshape(shape = concat_485x, x = var_4771_cast_fp16)[name = string("x_391_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4775_to_fp16 = const()[name = string("op_4775_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1118841088)))];
+            tensor<fp16, [1280]> var_4776_to_fp16 = const()[name = string("op_4776_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122117952)))];
+            tensor<fp16, [1, ?, 1280]> linear_173_cast_fp16 = linear(bias = var_4776_to_fp16, weight = var_4775_to_fp16, x = x_391_cast_fp16)[name = string("linear_173_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_393_cast_fp16 = add(x = x_387_cast_fp16, y = linear_173_cast_fp16)[name = string("x_393_cast_fp16")];
+            tensor<int32, [1]> var_4783_axes_0 = const()[name = string("op_4783_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122120576)))];
+            tensor<fp16, [1280]> blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122123200)))];
+            tensor<fp16, [1, ?, 1280]> var_4783_cast_fp16 = layer_norm(axes = var_4783_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_393_cast_fp16)[name = string("op_4783_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_4792_to_fp16 = const()[name = string("op_4792_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122125824)))];
+            tensor<fp16, [5120]> var_4793_to_fp16 = const()[name = string("op_4793_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135233088)))];
+            tensor<fp16, [1, ?, 5120]> linear_174_cast_fp16 = linear(bias = var_4793_to_fp16, weight = var_4792_to_fp16, x = var_4783_cast_fp16)[name = string("linear_174_cast_fp16")];
+            string x_397_mode_0 = const()[name = string("x_397_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_397_cast_fp16 = gelu(mode = x_397_mode_0, x = linear_174_cast_fp16)[name = string("x_397_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_4798_to_fp16 = const()[name = string("op_4798_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135243392)))];
+            tensor<fp16, [1280]> var_4799_to_fp16 = const()[name = string("op_4799_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148350656)))];
+            tensor<fp16, [1, ?, 1280]> linear_175_cast_fp16 = linear(bias = var_4799_to_fp16, weight = var_4798_to_fp16, x = x_397_cast_fp16)[name = string("linear_175_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_399_cast_fp16 = add(x = x_393_cast_fp16, y = linear_175_cast_fp16)[name = string("x_399_cast_fp16")];
+            tensor<int32, [4]> k_cache_89_begin_0 = const()[name = string("k_cache_89_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_89_end_0 = const()[name = string("k_cache_89_end_0"), val = tensor<int32, [4]>([23, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_89_end_mask_0 = const()[name = string("k_cache_89_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_89_squeeze_mask_0 = const()[name = string("k_cache_89_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_89_cast_fp16 = slice_by_index(begin = k_cache_89_begin_0, end = k_cache_89_end_0, end_mask = k_cache_89_end_mask_0, squeeze_mask = k_cache_89_squeeze_mask_0, x = coreml_update_state_106)[name = string("k_cache_89_cast_fp16")];
+            tensor<int32, [4]> v_cache_89_begin_0 = const()[name = string("v_cache_89_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_89_end_0 = const()[name = string("v_cache_89_end_0"), val = tensor<int32, [4]>([23, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_89_end_mask_0 = const()[name = string("v_cache_89_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_89_squeeze_mask_0 = const()[name = string("v_cache_89_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_89_cast_fp16 = slice_by_index(begin = v_cache_89_begin_0, end = v_cache_89_end_0, end_mask = v_cache_89_end_mask_0, squeeze_mask = v_cache_89_squeeze_mask_0, x = coreml_update_state_107)[name = string("v_cache_89_cast_fp16")];
+            tensor<int32, [4]> k_cache_91_begin_0 = const()[name = string("k_cache_91_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_91_end_0 = const()[name = string("k_cache_91_end_0"), val = tensor<int32, [4]>([23, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_91_end_mask_0 = const()[name = string("k_cache_91_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_91_squeeze_mask_0 = const()[name = string("k_cache_91_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_91_cast_fp16 = slice_by_index(begin = k_cache_91_begin_0, end = k_cache_91_end_0, end_mask = k_cache_91_end_mask_0, squeeze_mask = k_cache_91_squeeze_mask_0, x = read_state_2)[name = string("k_cache_91_cast_fp16")];
+            tensor<int32, [4]> v_cache_91_begin_0 = const()[name = string("v_cache_91_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_91_end_0 = const()[name = string("v_cache_91_end_0"), val = tensor<int32, [4]>([23, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_91_end_mask_0 = const()[name = string("v_cache_91_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_91_squeeze_mask_0 = const()[name = string("v_cache_91_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_91_cast_fp16 = slice_by_index(begin = v_cache_91_begin_0, end = v_cache_91_end_0, end_mask = v_cache_91_end_mask_0, squeeze_mask = v_cache_91_squeeze_mask_0, x = read_state_3)[name = string("v_cache_91_cast_fp16")];
+            int32 var_4822 = const()[name = string("op_4822"), val = int32(-1)];
+            tensor<int32, [1]> var_4840_axes_0 = const()[name = string("op_4840_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148353280)))];
+            tensor<fp16, [1280]> blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148355904)))];
+            fp16 var_4828_to_fp16 = const()[name = string("op_4828_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_4840_cast_fp16 = layer_norm(axes = var_4840_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_399_cast_fp16)[name = string("op_4840_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4851_to_fp16 = const()[name = string("op_4851_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148358528)))];
+            tensor<fp16, [1280]> var_4852_to_fp16 = const()[name = string("op_4852_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151635392)))];
+            tensor<fp16, [1, ?, 1280]> linear_176_cast_fp16 = linear(bias = var_4852_to_fp16, weight = var_4851_to_fp16, x = var_4840_cast_fp16)[name = string("linear_176_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4855_to_fp16 = const()[name = string("op_4855_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151638016)))];
+            tensor<fp16, [1, ?, 1280]> linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4855_to_fp16, x = var_4840_cast_fp16)[name = string("linear_177_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4859_to_fp16 = const()[name = string("op_4859_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1154914880)))];
+            tensor<fp16, [1280]> var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158191744)))];
+            tensor<fp16, [1, ?, 1280]> linear_178_cast_fp16 = linear(bias = var_4860_to_fp16, weight = var_4859_to_fp16, x = var_4840_cast_fp16)[name = string("linear_178_cast_fp16")];
+            tensor<int32, [3]> var_4862_shape_cast_fp16 = shape(x = linear_176_cast_fp16)[name = string("op_4862_shape_cast_fp16")];
+            int32 gather_266_axis_0 = const()[name = string("gather_266_axis_0"), val = int32(0)];
+            int32 gather_266_batch_dims_0 = const()[name = string("gather_266_batch_dims_0"), val = int32(0)];
+            bool gather_266_validate_indices_0 = const()[name = string("gather_266_validate_indices_0"), val = bool(false)];
+            string var_4862_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4862_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_266_to_uint16 = const()[name = string("select_266_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4862_shape_cast_fp16_to_uint16 = cast(dtype = var_4862_shape_cast_fp16_to_uint16_dtype_0, x = var_4862_shape_cast_fp16)[name = string("cast_346")];
+            uint16 gather_266_cast_uint16 = gather(axis = gather_266_axis_0, batch_dims = gather_266_batch_dims_0, indices = select_266_to_uint16, validate_indices = gather_266_validate_indices_0, x = var_4862_shape_cast_fp16_to_uint16)[name = string("gather_266_cast_uint16")];
+            string gather_266_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_266_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_266_cast_uint16_to_int32 = cast(dtype = gather_266_cast_uint16_to_int32_dtype_0, x = gather_266_cast_uint16)[name = string("cast_345")];
+            int32 end_step_47 = add(x = offset, y = gather_266_cast_uint16_to_int32)[name = string("end_step_47")];
+            tensor<int32, [1]> expand_dims_352 = const()[name = string("expand_dims_352"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_354 = const()[name = string("expand_dims_354"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_355_axes_0 = const()[name = string("expand_dims_355_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_355 = expand_dims(axes = expand_dims_355_axes_0, x = end_step_47)[name = string("expand_dims_355")];
+            tensor<int32, [1]> concat_488_values0_0 = const()[name = string("concat_488_values0_0"), val = tensor<int32, [1]>([22])];
+            int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)];
+            bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (concat_488_values0_0, expand_dims_352, expand_dims_1, expand_dims_354))[name = string("concat_488")];
+            tensor<int32, [1]> concat_489_values0_0 = const()[name = string("concat_489_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_489_values1_0 = const()[name = string("concat_489_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_489_values3_0 = const()[name = string("concat_489_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)];
+            bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (concat_489_values0_0, concat_489_values1_0, expand_dims_355, concat_489_values3_0))[name = string("concat_489")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = k_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = k_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_23_stride_0, update = linear_177_cast_fp16, x = coreml_update_state_106)[name = string("k_cache1_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_23_cast_fp16, input = k_cache1)[name = string("coreml_update_state_108_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_108 = read_state(input = k_cache1)[name = string("coreml_update_state_108")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = v_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = v_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_23_stride_0, update = linear_178_cast_fp16, x = coreml_update_state_107)[name = string("v_cache1_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_23_cast_fp16, input = v_cache1)[name = string("coreml_update_state_109_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_109 = read_state(input = v_cache1)[name = string("coreml_update_state_109")];
+            int32 concat_494_values0_0 = const()[name = string("concat_494_values0_0"), val = int32(1)];
+            int32 concat_494_values2_0 = const()[name = string("concat_494_values2_0"), val = int32(1280)];
+            int32 concat_494_axis_0 = const()[name = string("concat_494_axis_0"), val = int32(0)];
+            bool concat_494_interleave_0 = const()[name = string("concat_494_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_494 = concat(axis = concat_494_axis_0, interleave = concat_494_interleave_0, values = (concat_494_values0_0, end_step_47, concat_494_values2_0))[name = string("concat_494")];
+            tensor<int32, [3]> var_4878_begin_0 = const()[name = string("op_4878_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4878_end_mask_0 = const()[name = string("op_4878_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4878_cast_fp16 = slice_by_index(begin = var_4878_begin_0, end = concat_494, end_mask = var_4878_end_mask_0, x = k_cache_89_cast_fp16)[name = string("op_4878_cast_fp16")];
+            tensor<int32, [3]> var_4881_begin_0 = const()[name = string("op_4881_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4881_end_mask_0 = const()[name = string("op_4881_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_4881_cast_fp16 = slice_by_index(begin = var_4881_begin_0, end = concat_494, end_mask = var_4881_end_mask_0, x = v_cache_89_cast_fp16)[name = string("op_4881_cast_fp16")];
+            tensor<int32, [4]> concat_496x = const()[name = string("concat_496x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4891_cast_fp16 = reshape(shape = concat_496x, x = linear_176_cast_fp16)[name = string("op_4891_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_248_to_fp16 = const()[name = string("const_248_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_179_cast_fp16 = mul(x = var_4891_cast_fp16, y = const_248_to_fp16)[name = string("q_179_cast_fp16")];
+            tensor<int32, [4]> concat_497x = const()[name = string("concat_497x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4898_cast_fp16 = reshape(shape = concat_497x, x = var_4878_cast_fp16)[name = string("op_4898_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_225_cast_fp16 = mul(x = var_4898_cast_fp16, y = const_249_to_fp16)[name = string("k_225_cast_fp16")];
+            tensor<int32, [4]> concat_498x = const()[name = string("concat_498x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4905_cast_fp16 = reshape(shape = concat_498x, x = var_4881_cast_fp16)[name = string("op_4905_cast_fp16")];
+            tensor<int32, [4]> var_4906 = const()[name = string("op_4906"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_133_transpose_x_0 = const()[name = string("qk_133_transpose_x_0"), val = bool(false)];
+            bool qk_133_transpose_y_0 = const()[name = string("qk_133_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_345_perm_0 = const()[name = string("transpose_345_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_346_perm_0 = const()[name = string("transpose_346_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_346 = transpose(perm = transpose_346_perm_0, x = k_225_cast_fp16)[name = string("transpose_462")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_345 = transpose(perm = transpose_345_perm_0, x = q_179_cast_fp16)[name = string("transpose_463")];
+            tensor<fp16, [1, 20, ?, ?]> qk_133_cast_fp16 = matmul(transpose_x = qk_133_transpose_x_0, transpose_y = qk_133_transpose_y_0, x = transpose_345, y = transpose_346)[name = string("qk_133_cast_fp16")];
+            int32 concat_499_values1_0 = const()[name = string("concat_499_values1_0"), val = int32(448)];
+            int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)];
+            bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (gather_266_cast_uint16_to_int32, concat_499_values1_0))[name = string("concat_499")];
+            tensor<int32, [2]> var_4909_begin_0 = const()[name = string("op_4909_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4909_end_mask_0 = const()[name = string("op_4909_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4909_cast_fp16 = slice_by_index(begin = var_4909_begin_0, end = concat_499, end_mask = var_4909_end_mask_0, x = mask_to_fp16)[name = string("op_4909_cast_fp16")];
+            int32 concat_500_values0_0 = const()[name = string("concat_500_values0_0"), val = int32(0)];
+            int32 concat_500_axis_0 = const()[name = string("concat_500_axis_0"), val = int32(0)];
+            bool concat_500_interleave_0 = const()[name = string("concat_500_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_500 = concat(axis = concat_500_axis_0, interleave = concat_500_interleave_0, values = (concat_500_values0_0, gather_266_cast_uint16_to_int32))[name = string("concat_500")];
+            tensor<int32, [2]> var_4910_begin_0 = const()[name = string("op_4910_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4910_end_mask_0 = const()[name = string("op_4910_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4910_cast_fp16 = slice_by_index(begin = var_4910_begin_0, end = concat_500, end_mask = var_4910_end_mask_0, x = var_4909_cast_fp16)[name = string("op_4910_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_135_cast_fp16 = add(x = qk_133_cast_fp16, y = var_4910_cast_fp16)[name = string("qk_135_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_4913_cast_fp16 = softmax(axis = var_4822, x = qk_135_cast_fp16)[name = string("op_4913_cast_fp16")];
+            bool var_4915_transpose_x_0 = const()[name = string("op_4915_transpose_x_0"), val = bool(false)];
+            bool var_4915_transpose_y_0 = const()[name = string("op_4915_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_225_cast_fp16 = transpose(perm = var_4906, x = var_4905_cast_fp16)[name = string("transpose_464")];
+            tensor<fp16, [1, 20, ?, 64]> var_4915_cast_fp16 = matmul(transpose_x = var_4915_transpose_x_0, transpose_y = var_4915_transpose_y_0, x = var_4913_cast_fp16, y = v_225_cast_fp16)[name = string("op_4915_cast_fp16")];
+            tensor<int32, [4]> var_4916 = const()[name = string("op_4916"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_501x = const()[name = string("concat_501x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4917_cast_fp16 = transpose(perm = var_4916, x = var_4915_cast_fp16)[name = string("transpose_461")];
+            tensor<fp16, [1, ?, 1280]> x_403_cast_fp16 = reshape(shape = concat_501x, x = var_4917_cast_fp16)[name = string("x_403_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4921_to_fp16 = const()[name = string("op_4921_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158194368)))];
+            tensor<fp16, [1280]> var_4922_to_fp16 = const()[name = string("op_4922_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161471232)))];
+            tensor<fp16, [1, ?, 1280]> linear_179_cast_fp16 = linear(bias = var_4922_to_fp16, weight = var_4921_to_fp16, x = x_403_cast_fp16)[name = string("linear_179_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_405_cast_fp16 = add(x = x_399_cast_fp16, y = linear_179_cast_fp16)[name = string("x_405_cast_fp16")];
+            tensor<int32, [1]> var_4929_axes_0 = const()[name = string("op_4929_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_22_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161473856)))];
+            tensor<fp16, [1280]> blocks_22_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161476480)))];
+            tensor<fp16, [1, ?, 1280]> var_4929_cast_fp16 = layer_norm(axes = var_4929_axes_0, beta = blocks_22_cross_attn_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_cross_attn_ln_weight_to_fp16, x = x_405_cast_fp16)[name = string("op_4929_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161479104)))];
+            tensor<fp16, [1280]> var_4939_to_fp16 = const()[name = string("op_4939_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164755968)))];
+            tensor<fp16, [1, ?, 1280]> linear_180_cast_fp16 = linear(bias = var_4939_to_fp16, weight = var_4938_to_fp16, x = var_4929_cast_fp16)[name = string("linear_180_cast_fp16")];
+            tensor<int32, [3]> concat_502 = const()[name = string("concat_502"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_503 = const()[name = string("concat_503"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_227_internal_tensor_assign_1_stride_0 = const()[name = string("k_227_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_502, begin_mask = k_227_internal_tensor_assign_1_begin_mask_0, end = concat_503, end_mask = k_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_227_internal_tensor_assign_1_squeeze_mask_0, stride = k_227_internal_tensor_assign_1_stride_0, update = k_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("k_227_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_504 = const()[name = string("concat_504"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_505 = const()[name = string("concat_505"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_227_internal_tensor_assign_1_stride_0 = const()[name = string("v_227_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_504, begin_mask = v_227_internal_tensor_assign_1_begin_mask_0, end = concat_505, end_mask = v_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_227_internal_tensor_assign_1_squeeze_mask_0, stride = v_227_internal_tensor_assign_1_stride_0, update = v_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("v_227_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_506x = const()[name = string("concat_506x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_4959_cast_fp16 = reshape(shape = concat_506x, x = linear_180_cast_fp16)[name = string("op_4959_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_183_cast_fp16 = mul(x = var_4959_cast_fp16, y = const_250_to_fp16)[name = string("q_183_cast_fp16")];
+            tensor<int32, [4]> var_4965 = const()[name = string("op_4965"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4966_cast_fp16 = reshape(shape = var_4965, x = k_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4966_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_229_cast_fp16 = mul(x = var_4966_cast_fp16, y = const_251_to_fp16)[name = string("k_229_cast_fp16")];
+            tensor<int32, [4]> var_4972 = const()[name = string("op_4972"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_4973_cast_fp16 = reshape(shape = var_4972, x = v_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4973_cast_fp16")];
+            tensor<int32, [4]> var_4974 = const()[name = string("op_4974"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_137_transpose_x_0 = const()[name = string("qk_137_transpose_x_0"), val = bool(false)];
+            bool qk_137_transpose_y_0 = const()[name = string("qk_137_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_347_perm_0 = const()[name = string("transpose_347_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_348_perm_0 = const()[name = string("transpose_348_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_348 = transpose(perm = transpose_348_perm_0, x = k_229_cast_fp16)[name = string("transpose_458")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_347 = transpose(perm = transpose_347_perm_0, x = q_183_cast_fp16)[name = string("transpose_459")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_137_cast_fp16 = matmul(transpose_x = qk_137_transpose_x_0, transpose_y = qk_137_transpose_y_0, x = transpose_347, y = transpose_348)[name = string("qk_137_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_4978_cast_fp16 = softmax(axis = var_4822, x = qk_137_cast_fp16)[name = string("op_4978_cast_fp16")];
+            bool var_4980_transpose_x_0 = const()[name = string("op_4980_transpose_x_0"), val = bool(false)];
+            bool var_4980_transpose_y_0 = const()[name = string("op_4980_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_229_cast_fp16 = transpose(perm = var_4974, x = var_4973_cast_fp16)[name = string("transpose_460")];
+            tensor<fp16, [1, 20, ?, 64]> var_4980_cast_fp16 = matmul(transpose_x = var_4980_transpose_x_0, transpose_y = var_4980_transpose_y_0, x = var_4978_cast_fp16, y = v_229_cast_fp16)[name = string("op_4980_cast_fp16")];
+            tensor<int32, [4]> var_4981 = const()[name = string("op_4981"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_507x = const()[name = string("concat_507x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_4982_cast_fp16 = transpose(perm = var_4981, x = var_4980_cast_fp16)[name = string("transpose_457")];
+            tensor<fp16, [1, ?, 1280]> x_409_cast_fp16 = reshape(shape = concat_507x, x = var_4982_cast_fp16)[name = string("x_409_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_4986_to_fp16 = const()[name = string("op_4986_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164758592)))];
+            tensor<fp16, [1280]> var_4987_to_fp16 = const()[name = string("op_4987_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168035456)))];
+            tensor<fp16, [1, ?, 1280]> linear_181_cast_fp16 = linear(bias = var_4987_to_fp16, weight = var_4986_to_fp16, x = x_409_cast_fp16)[name = string("linear_181_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_411_cast_fp16 = add(x = x_405_cast_fp16, y = linear_181_cast_fp16)[name = string("x_411_cast_fp16")];
+            tensor<int32, [1]> var_4994_axes_0 = const()[name = string("op_4994_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168038080)))];
+            tensor<fp16, [1280]> blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168040704)))];
+            tensor<fp16, [1, ?, 1280]> var_4994_cast_fp16 = layer_norm(axes = var_4994_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_411_cast_fp16)[name = string("op_4994_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_5003_to_fp16 = const()[name = string("op_5003_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168043328)))];
+            tensor<fp16, [5120]> var_5004_to_fp16 = const()[name = string("op_5004_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181150592)))];
+            tensor<fp16, [1, ?, 5120]> linear_182_cast_fp16 = linear(bias = var_5004_to_fp16, weight = var_5003_to_fp16, x = var_4994_cast_fp16)[name = string("linear_182_cast_fp16")];
+            string x_415_mode_0 = const()[name = string("x_415_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_415_cast_fp16 = gelu(mode = x_415_mode_0, x = linear_182_cast_fp16)[name = string("x_415_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_5009_to_fp16 = const()[name = string("op_5009_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181160896)))];
+            tensor<fp16, [1280]> var_5010_to_fp16 = const()[name = string("op_5010_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194268160)))];
+            tensor<fp16, [1, ?, 1280]> linear_183_cast_fp16 = linear(bias = var_5010_to_fp16, weight = var_5009_to_fp16, x = x_415_cast_fp16)[name = string("linear_183_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_417_cast_fp16 = add(x = x_411_cast_fp16, y = linear_183_cast_fp16)[name = string("x_417_cast_fp16")];
+            tensor<int32, [4]> k_cache_93_begin_0 = const()[name = string("k_cache_93_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_93_end_0 = const()[name = string("k_cache_93_end_0"), val = tensor<int32, [4]>([24, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_93_end_mask_0 = const()[name = string("k_cache_93_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_93_squeeze_mask_0 = const()[name = string("k_cache_93_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_93_cast_fp16 = slice_by_index(begin = k_cache_93_begin_0, end = k_cache_93_end_0, end_mask = k_cache_93_end_mask_0, squeeze_mask = k_cache_93_squeeze_mask_0, x = coreml_update_state_108)[name = string("k_cache_93_cast_fp16")];
+            tensor<int32, [4]> v_cache_93_begin_0 = const()[name = string("v_cache_93_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_93_end_0 = const()[name = string("v_cache_93_end_0"), val = tensor<int32, [4]>([24, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_93_end_mask_0 = const()[name = string("v_cache_93_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_93_squeeze_mask_0 = const()[name = string("v_cache_93_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_93_cast_fp16 = slice_by_index(begin = v_cache_93_begin_0, end = v_cache_93_end_0, end_mask = v_cache_93_end_mask_0, squeeze_mask = v_cache_93_squeeze_mask_0, x = coreml_update_state_109)[name = string("v_cache_93_cast_fp16")];
+            tensor<int32, [4]> k_cache_95_begin_0 = const()[name = string("k_cache_95_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_95_end_0 = const()[name = string("k_cache_95_end_0"), val = tensor<int32, [4]>([24, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_95_end_mask_0 = const()[name = string("k_cache_95_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_95_squeeze_mask_0 = const()[name = string("k_cache_95_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_95_cast_fp16 = slice_by_index(begin = k_cache_95_begin_0, end = k_cache_95_end_0, end_mask = k_cache_95_end_mask_0, squeeze_mask = k_cache_95_squeeze_mask_0, x = read_state_2)[name = string("k_cache_95_cast_fp16")];
+            tensor<int32, [4]> v_cache_95_begin_0 = const()[name = string("v_cache_95_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_95_end_0 = const()[name = string("v_cache_95_end_0"), val = tensor<int32, [4]>([24, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_95_end_mask_0 = const()[name = string("v_cache_95_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_95_squeeze_mask_0 = const()[name = string("v_cache_95_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_95_cast_fp16 = slice_by_index(begin = v_cache_95_begin_0, end = v_cache_95_end_0, end_mask = v_cache_95_end_mask_0, squeeze_mask = v_cache_95_squeeze_mask_0, x = read_state_3)[name = string("v_cache_95_cast_fp16")];
+            int32 var_5033 = const()[name = string("op_5033"), val = int32(-1)];
+            tensor<int32, [1]> var_5051_axes_0 = const()[name = string("op_5051_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194270784)))];
+            tensor<fp16, [1280]> blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194273408)))];
+            fp16 var_5039_to_fp16 = const()[name = string("op_5039_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_5051_cast_fp16 = layer_norm(axes = var_5051_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_417_cast_fp16)[name = string("op_5051_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5062_to_fp16 = const()[name = string("op_5062_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194276032)))];
+            tensor<fp16, [1280]> var_5063_to_fp16 = const()[name = string("op_5063_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197552896)))];
+            tensor<fp16, [1, ?, 1280]> linear_184_cast_fp16 = linear(bias = var_5063_to_fp16, weight = var_5062_to_fp16, x = var_5051_cast_fp16)[name = string("linear_184_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5066_to_fp16 = const()[name = string("op_5066_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197555520)))];
+            tensor<fp16, [1, ?, 1280]> linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5066_to_fp16, x = var_5051_cast_fp16)[name = string("linear_185_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5070_to_fp16 = const()[name = string("op_5070_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1200832384)))];
+            tensor<fp16, [1280]> var_5071_to_fp16 = const()[name = string("op_5071_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1204109248)))];
+            tensor<fp16, [1, ?, 1280]> linear_186_cast_fp16 = linear(bias = var_5071_to_fp16, weight = var_5070_to_fp16, x = var_5051_cast_fp16)[name = string("linear_186_cast_fp16")];
+            tensor<int32, [3]> var_5073_shape_cast_fp16 = shape(x = linear_184_cast_fp16)[name = string("op_5073_shape_cast_fp16")];
+            int32 gather_278_axis_0 = const()[name = string("gather_278_axis_0"), val = int32(0)];
+            int32 gather_278_batch_dims_0 = const()[name = string("gather_278_batch_dims_0"), val = int32(0)];
+            bool gather_278_validate_indices_0 = const()[name = string("gather_278_validate_indices_0"), val = bool(false)];
+            string var_5073_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5073_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_278_to_uint16 = const()[name = string("select_278_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5073_shape_cast_fp16_to_uint16 = cast(dtype = var_5073_shape_cast_fp16_to_uint16_dtype_0, x = var_5073_shape_cast_fp16)[name = string("cast_344")];
+            uint16 gather_278_cast_uint16 = gather(axis = gather_278_axis_0, batch_dims = gather_278_batch_dims_0, indices = select_278_to_uint16, validate_indices = gather_278_validate_indices_0, x = var_5073_shape_cast_fp16_to_uint16)[name = string("gather_278_cast_uint16")];
+            string gather_278_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_278_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_278_cast_uint16_to_int32 = cast(dtype = gather_278_cast_uint16_to_int32_dtype_0, x = gather_278_cast_uint16)[name = string("cast_343")];
+            int32 end_step_49 = add(x = offset, y = gather_278_cast_uint16_to_int32)[name = string("end_step_49")];
+            tensor<int32, [1]> expand_dims_368 = const()[name = string("expand_dims_368"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_370 = const()[name = string("expand_dims_370"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_371_axes_0 = const()[name = string("expand_dims_371_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_371 = expand_dims(axes = expand_dims_371_axes_0, x = end_step_49)[name = string("expand_dims_371")];
+            tensor<int32, [1]> concat_510_values0_0 = const()[name = string("concat_510_values0_0"), val = tensor<int32, [1]>([23])];
+            int32 concat_510_axis_0 = const()[name = string("concat_510_axis_0"), val = int32(0)];
+            bool concat_510_interleave_0 = const()[name = string("concat_510_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_510 = concat(axis = concat_510_axis_0, interleave = concat_510_interleave_0, values = (concat_510_values0_0, expand_dims_368, expand_dims_1, expand_dims_370))[name = string("concat_510")];
+            tensor<int32, [1]> concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_511_values3_0 = const()[name = string("concat_511_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)];
+            bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, expand_dims_371, concat_511_values3_0))[name = string("concat_511")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = k_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = k_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_24_stride_0, update = linear_185_cast_fp16, x = coreml_update_state_108)[name = string("k_cache1_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_24_cast_fp16, input = k_cache1)[name = string("coreml_update_state_110_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_110 = read_state(input = k_cache1)[name = string("coreml_update_state_110")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = v_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = v_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_24_stride_0, update = linear_186_cast_fp16, x = coreml_update_state_109)[name = string("v_cache1_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_24_cast_fp16, input = v_cache1)[name = string("coreml_update_state_111_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_111 = read_state(input = v_cache1)[name = string("coreml_update_state_111")];
+            int32 concat_516_values0_0 = const()[name = string("concat_516_values0_0"), val = int32(1)];
+            int32 concat_516_values2_0 = const()[name = string("concat_516_values2_0"), val = int32(1280)];
+            int32 concat_516_axis_0 = const()[name = string("concat_516_axis_0"), val = int32(0)];
+            bool concat_516_interleave_0 = const()[name = string("concat_516_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_516 = concat(axis = concat_516_axis_0, interleave = concat_516_interleave_0, values = (concat_516_values0_0, end_step_49, concat_516_values2_0))[name = string("concat_516")];
+            tensor<int32, [3]> var_5089_begin_0 = const()[name = string("op_5089_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5089_end_mask_0 = const()[name = string("op_5089_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5089_cast_fp16 = slice_by_index(begin = var_5089_begin_0, end = concat_516, end_mask = var_5089_end_mask_0, x = k_cache_93_cast_fp16)[name = string("op_5089_cast_fp16")];
+            tensor<int32, [3]> var_5092_begin_0 = const()[name = string("op_5092_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5092_end_mask_0 = const()[name = string("op_5092_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5092_cast_fp16 = slice_by_index(begin = var_5092_begin_0, end = concat_516, end_mask = var_5092_end_mask_0, x = v_cache_93_cast_fp16)[name = string("op_5092_cast_fp16")];
+            tensor<int32, [4]> concat_518x = const()[name = string("concat_518x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5102_cast_fp16 = reshape(shape = concat_518x, x = linear_184_cast_fp16)[name = string("op_5102_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_252_to_fp16 = const()[name = string("const_252_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_187_cast_fp16 = mul(x = var_5102_cast_fp16, y = const_252_to_fp16)[name = string("q_187_cast_fp16")];
+            tensor<int32, [4]> concat_519x = const()[name = string("concat_519x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5109_cast_fp16 = reshape(shape = concat_519x, x = var_5089_cast_fp16)[name = string("op_5109_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_253_to_fp16 = const()[name = string("const_253_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_235_cast_fp16 = mul(x = var_5109_cast_fp16, y = const_253_to_fp16)[name = string("k_235_cast_fp16")];
+            tensor<int32, [4]> concat_520x = const()[name = string("concat_520x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5116_cast_fp16 = reshape(shape = concat_520x, x = var_5092_cast_fp16)[name = string("op_5116_cast_fp16")];
+            tensor<int32, [4]> var_5117 = const()[name = string("op_5117"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_139_transpose_x_0 = const()[name = string("qk_139_transpose_x_0"), val = bool(false)];
+            bool qk_139_transpose_y_0 = const()[name = string("qk_139_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_349_perm_0 = const()[name = string("transpose_349_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_350_perm_0 = const()[name = string("transpose_350_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_350 = transpose(perm = transpose_350_perm_0, x = k_235_cast_fp16)[name = string("transpose_454")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_349 = transpose(perm = transpose_349_perm_0, x = q_187_cast_fp16)[name = string("transpose_455")];
+            tensor<fp16, [1, 20, ?, ?]> qk_139_cast_fp16 = matmul(transpose_x = qk_139_transpose_x_0, transpose_y = qk_139_transpose_y_0, x = transpose_349, y = transpose_350)[name = string("qk_139_cast_fp16")];
+            int32 concat_521_values1_0 = const()[name = string("concat_521_values1_0"), val = int32(448)];
+            int32 concat_521_axis_0 = const()[name = string("concat_521_axis_0"), val = int32(0)];
+            bool concat_521_interleave_0 = const()[name = string("concat_521_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_521 = concat(axis = concat_521_axis_0, interleave = concat_521_interleave_0, values = (gather_278_cast_uint16_to_int32, concat_521_values1_0))[name = string("concat_521")];
+            tensor<int32, [2]> var_5120_begin_0 = const()[name = string("op_5120_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5120_end_mask_0 = const()[name = string("op_5120_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5120_cast_fp16 = slice_by_index(begin = var_5120_begin_0, end = concat_521, end_mask = var_5120_end_mask_0, x = mask_to_fp16)[name = string("op_5120_cast_fp16")];
+            int32 concat_522_values0_0 = const()[name = string("concat_522_values0_0"), val = int32(0)];
+            int32 concat_522_axis_0 = const()[name = string("concat_522_axis_0"), val = int32(0)];
+            bool concat_522_interleave_0 = const()[name = string("concat_522_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_522 = concat(axis = concat_522_axis_0, interleave = concat_522_interleave_0, values = (concat_522_values0_0, gather_278_cast_uint16_to_int32))[name = string("concat_522")];
+            tensor<int32, [2]> var_5121_begin_0 = const()[name = string("op_5121_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5121_end_mask_0 = const()[name = string("op_5121_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5121_cast_fp16 = slice_by_index(begin = var_5121_begin_0, end = concat_522, end_mask = var_5121_end_mask_0, x = var_5120_cast_fp16)[name = string("op_5121_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_141_cast_fp16 = add(x = qk_139_cast_fp16, y = var_5121_cast_fp16)[name = string("qk_141_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_5124_cast_fp16 = softmax(axis = var_5033, x = qk_141_cast_fp16)[name = string("op_5124_cast_fp16")];
+            bool var_5126_transpose_x_0 = const()[name = string("op_5126_transpose_x_0"), val = bool(false)];
+            bool var_5126_transpose_y_0 = const()[name = string("op_5126_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_235_cast_fp16 = transpose(perm = var_5117, x = var_5116_cast_fp16)[name = string("transpose_456")];
+            tensor<fp16, [1, 20, ?, 64]> var_5126_cast_fp16 = matmul(transpose_x = var_5126_transpose_x_0, transpose_y = var_5126_transpose_y_0, x = var_5124_cast_fp16, y = v_235_cast_fp16)[name = string("op_5126_cast_fp16")];
+            tensor<int32, [4]> var_5127 = const()[name = string("op_5127"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_523x = const()[name = string("concat_523x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5128_cast_fp16 = transpose(perm = var_5127, x = var_5126_cast_fp16)[name = string("transpose_453")];
+            tensor<fp16, [1, ?, 1280]> x_421_cast_fp16 = reshape(shape = concat_523x, x = var_5128_cast_fp16)[name = string("x_421_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5132_to_fp16 = const()[name = string("op_5132_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1204111872)))];
+            tensor<fp16, [1280]> var_5133_to_fp16 = const()[name = string("op_5133_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207388736)))];
+            tensor<fp16, [1, ?, 1280]> linear_187_cast_fp16 = linear(bias = var_5133_to_fp16, weight = var_5132_to_fp16, x = x_421_cast_fp16)[name = string("linear_187_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_423_cast_fp16 = add(x = x_417_cast_fp16, y = linear_187_cast_fp16)[name = string("x_423_cast_fp16")];
+            tensor<int32, [1]> var_5140_axes_0 = const()[name = string("op_5140_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_23_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207391360)))];
+            tensor<fp16, [1280]> blocks_23_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207393984)))];
+            tensor<fp16, [1, ?, 1280]> var_5140_cast_fp16 = layer_norm(axes = var_5140_axes_0, beta = blocks_23_cross_attn_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_cross_attn_ln_weight_to_fp16, x = x_423_cast_fp16)[name = string("op_5140_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5149_to_fp16 = const()[name = string("op_5149_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207396608)))];
+            tensor<fp16, [1280]> var_5150_to_fp16 = const()[name = string("op_5150_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210673472)))];
+            tensor<fp16, [1, ?, 1280]> linear_188_cast_fp16 = linear(bias = var_5150_to_fp16, weight = var_5149_to_fp16, x = var_5140_cast_fp16)[name = string("linear_188_cast_fp16")];
+            tensor<int32, [3]> concat_524 = const()[name = string("concat_524"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_525 = const()[name = string("concat_525"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_237_internal_tensor_assign_1_stride_0 = const()[name = string("k_237_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_524, begin_mask = k_237_internal_tensor_assign_1_begin_mask_0, end = concat_525, end_mask = k_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_237_internal_tensor_assign_1_squeeze_mask_0, stride = k_237_internal_tensor_assign_1_stride_0, update = k_cache_95_cast_fp16, x = k_7_to_fp16)[name = string("k_237_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_526 = const()[name = string("concat_526"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_527 = const()[name = string("concat_527"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_237_internal_tensor_assign_1_stride_0 = const()[name = string("v_237_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_526, begin_mask = v_237_internal_tensor_assign_1_begin_mask_0, end = concat_527, end_mask = v_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_237_internal_tensor_assign_1_squeeze_mask_0, stride = v_237_internal_tensor_assign_1_stride_0, update = v_cache_95_cast_fp16, x = k_7_to_fp16)[name = string("v_237_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_528x = const()[name = string("concat_528x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5170_cast_fp16 = reshape(shape = concat_528x, x = linear_188_cast_fp16)[name = string("op_5170_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_254_to_fp16 = const()[name = string("const_254_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_191_cast_fp16 = mul(x = var_5170_cast_fp16, y = const_254_to_fp16)[name = string("q_191_cast_fp16")];
+            tensor<int32, [4]> var_5176 = const()[name = string("op_5176"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5177_cast_fp16 = reshape(shape = var_5176, x = k_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5177_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_255_to_fp16 = const()[name = string("const_255_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_239_cast_fp16 = mul(x = var_5177_cast_fp16, y = const_255_to_fp16)[name = string("k_239_cast_fp16")];
+            tensor<int32, [4]> var_5183 = const()[name = string("op_5183"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5184_cast_fp16 = reshape(shape = var_5183, x = v_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5184_cast_fp16")];
+            tensor<int32, [4]> var_5185 = const()[name = string("op_5185"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_143_transpose_x_0 = const()[name = string("qk_143_transpose_x_0"), val = bool(false)];
+            bool qk_143_transpose_y_0 = const()[name = string("qk_143_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_351_perm_0 = const()[name = string("transpose_351_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_352_perm_0 = const()[name = string("transpose_352_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_352 = transpose(perm = transpose_352_perm_0, x = k_239_cast_fp16)[name = string("transpose_450")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_351 = transpose(perm = transpose_351_perm_0, x = q_191_cast_fp16)[name = string("transpose_451")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_143_cast_fp16 = matmul(transpose_x = qk_143_transpose_x_0, transpose_y = qk_143_transpose_y_0, x = transpose_351, y = transpose_352)[name = string("qk_143_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_5189_cast_fp16 = softmax(axis = var_5033, x = qk_143_cast_fp16)[name = string("op_5189_cast_fp16")];
+            bool var_5191_transpose_x_0 = const()[name = string("op_5191_transpose_x_0"), val = bool(false)];
+            bool var_5191_transpose_y_0 = const()[name = string("op_5191_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_239_cast_fp16 = transpose(perm = var_5185, x = var_5184_cast_fp16)[name = string("transpose_452")];
+            tensor<fp16, [1, 20, ?, 64]> var_5191_cast_fp16 = matmul(transpose_x = var_5191_transpose_x_0, transpose_y = var_5191_transpose_y_0, x = var_5189_cast_fp16, y = v_239_cast_fp16)[name = string("op_5191_cast_fp16")];
+            tensor<int32, [4]> var_5192 = const()[name = string("op_5192"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_529x = const()[name = string("concat_529x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5193_cast_fp16 = transpose(perm = var_5192, x = var_5191_cast_fp16)[name = string("transpose_449")];
+            tensor<fp16, [1, ?, 1280]> x_427_cast_fp16 = reshape(shape = concat_529x, x = var_5193_cast_fp16)[name = string("x_427_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5197_to_fp16 = const()[name = string("op_5197_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210676096)))];
+            tensor<fp16, [1280]> var_5198_to_fp16 = const()[name = string("op_5198_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213952960)))];
+            tensor<fp16, [1, ?, 1280]> linear_189_cast_fp16 = linear(bias = var_5198_to_fp16, weight = var_5197_to_fp16, x = x_427_cast_fp16)[name = string("linear_189_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_429_cast_fp16 = add(x = x_423_cast_fp16, y = linear_189_cast_fp16)[name = string("x_429_cast_fp16")];
+            tensor<int32, [1]> var_5205_axes_0 = const()[name = string("op_5205_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213955584)))];
+            tensor<fp16, [1280]> blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213958208)))];
+            tensor<fp16, [1, ?, 1280]> var_5205_cast_fp16 = layer_norm(axes = var_5205_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_429_cast_fp16)[name = string("op_5205_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_5214_to_fp16 = const()[name = string("op_5214_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213960832)))];
+            tensor<fp16, [5120]> var_5215_to_fp16 = const()[name = string("op_5215_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227068096)))];
+            tensor<fp16, [1, ?, 5120]> linear_190_cast_fp16 = linear(bias = var_5215_to_fp16, weight = var_5214_to_fp16, x = var_5205_cast_fp16)[name = string("linear_190_cast_fp16")];
+            string x_433_mode_0 = const()[name = string("x_433_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_433_cast_fp16 = gelu(mode = x_433_mode_0, x = linear_190_cast_fp16)[name = string("x_433_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_5220_to_fp16 = const()[name = string("op_5220_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227078400)))];
+            tensor<fp16, [1280]> var_5221_to_fp16 = const()[name = string("op_5221_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240185664)))];
+            tensor<fp16, [1, ?, 1280]> linear_191_cast_fp16 = linear(bias = var_5221_to_fp16, weight = var_5220_to_fp16, x = x_433_cast_fp16)[name = string("linear_191_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_435_cast_fp16 = add(x = x_429_cast_fp16, y = linear_191_cast_fp16)[name = string("x_435_cast_fp16")];
+            tensor<int32, [4]> k_cache_97_begin_0 = const()[name = string("k_cache_97_begin_0"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_97_end_0 = const()[name = string("k_cache_97_end_0"), val = tensor<int32, [4]>([25, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_97_end_mask_0 = const()[name = string("k_cache_97_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_97_squeeze_mask_0 = const()[name = string("k_cache_97_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_97_cast_fp16 = slice_by_index(begin = k_cache_97_begin_0, end = k_cache_97_end_0, end_mask = k_cache_97_end_mask_0, squeeze_mask = k_cache_97_squeeze_mask_0, x = coreml_update_state_110)[name = string("k_cache_97_cast_fp16")];
+            tensor<int32, [4]> v_cache_97_begin_0 = const()[name = string("v_cache_97_begin_0"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_97_end_0 = const()[name = string("v_cache_97_end_0"), val = tensor<int32, [4]>([25, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_97_end_mask_0 = const()[name = string("v_cache_97_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_97_squeeze_mask_0 = const()[name = string("v_cache_97_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_97_cast_fp16 = slice_by_index(begin = v_cache_97_begin_0, end = v_cache_97_end_0, end_mask = v_cache_97_end_mask_0, squeeze_mask = v_cache_97_squeeze_mask_0, x = coreml_update_state_111)[name = string("v_cache_97_cast_fp16")];
+            tensor<int32, [4]> k_cache_99_begin_0 = const()[name = string("k_cache_99_begin_0"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_99_end_0 = const()[name = string("k_cache_99_end_0"), val = tensor<int32, [4]>([25, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_99_end_mask_0 = const()[name = string("k_cache_99_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_99_squeeze_mask_0 = const()[name = string("k_cache_99_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_99_cast_fp16 = slice_by_index(begin = k_cache_99_begin_0, end = k_cache_99_end_0, end_mask = k_cache_99_end_mask_0, squeeze_mask = k_cache_99_squeeze_mask_0, x = read_state_2)[name = string("k_cache_99_cast_fp16")];
+            tensor<int32, [4]> v_cache_99_begin_0 = const()[name = string("v_cache_99_begin_0"), val = tensor<int32, [4]>([24, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_99_end_0 = const()[name = string("v_cache_99_end_0"), val = tensor<int32, [4]>([25, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_99_end_mask_0 = const()[name = string("v_cache_99_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_99_squeeze_mask_0 = const()[name = string("v_cache_99_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_99_cast_fp16 = slice_by_index(begin = v_cache_99_begin_0, end = v_cache_99_end_0, end_mask = v_cache_99_end_mask_0, squeeze_mask = v_cache_99_squeeze_mask_0, x = read_state_3)[name = string("v_cache_99_cast_fp16")];
+            int32 var_5244 = const()[name = string("op_5244"), val = int32(-1)];
+            tensor<int32, [1]> var_5262_axes_0 = const()[name = string("op_5262_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_24_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240188288)))];
+            tensor<fp16, [1280]> blocks_24_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240190912)))];
+            fp16 var_5250_to_fp16 = const()[name = string("op_5250_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_5262_cast_fp16 = layer_norm(axes = var_5262_axes_0, beta = blocks_24_attn_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_attn_ln_weight_to_fp16, x = x_435_cast_fp16)[name = string("op_5262_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5273_to_fp16 = const()[name = string("op_5273_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240193536)))];
+            tensor<fp16, [1280]> var_5274_to_fp16 = const()[name = string("op_5274_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1243470400)))];
+            tensor<fp16, [1, ?, 1280]> linear_192_cast_fp16 = linear(bias = var_5274_to_fp16, weight = var_5273_to_fp16, x = var_5262_cast_fp16)[name = string("linear_192_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5277_to_fp16 = const()[name = string("op_5277_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1243473024)))];
+            tensor<fp16, [1, ?, 1280]> linear_193_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5277_to_fp16, x = var_5262_cast_fp16)[name = string("linear_193_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5281_to_fp16 = const()[name = string("op_5281_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1246749888)))];
+            tensor<fp16, [1280]> var_5282_to_fp16 = const()[name = string("op_5282_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1250026752)))];
+            tensor<fp16, [1, ?, 1280]> linear_194_cast_fp16 = linear(bias = var_5282_to_fp16, weight = var_5281_to_fp16, x = var_5262_cast_fp16)[name = string("linear_194_cast_fp16")];
+            tensor<int32, [3]> var_5284_shape_cast_fp16 = shape(x = linear_192_cast_fp16)[name = string("op_5284_shape_cast_fp16")];
+            int32 gather_290_axis_0 = const()[name = string("gather_290_axis_0"), val = int32(0)];
+            int32 gather_290_batch_dims_0 = const()[name = string("gather_290_batch_dims_0"), val = int32(0)];
+            bool gather_290_validate_indices_0 = const()[name = string("gather_290_validate_indices_0"), val = bool(false)];
+            string var_5284_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5284_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_290_to_uint16 = const()[name = string("select_290_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5284_shape_cast_fp16_to_uint16 = cast(dtype = var_5284_shape_cast_fp16_to_uint16_dtype_0, x = var_5284_shape_cast_fp16)[name = string("cast_342")];
+            uint16 gather_290_cast_uint16 = gather(axis = gather_290_axis_0, batch_dims = gather_290_batch_dims_0, indices = select_290_to_uint16, validate_indices = gather_290_validate_indices_0, x = var_5284_shape_cast_fp16_to_uint16)[name = string("gather_290_cast_uint16")];
+            string gather_290_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_290_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_290_cast_uint16_to_int32 = cast(dtype = gather_290_cast_uint16_to_int32_dtype_0, x = gather_290_cast_uint16)[name = string("cast_341")];
+            int32 end_step_51 = add(x = offset, y = gather_290_cast_uint16_to_int32)[name = string("end_step_51")];
+            tensor<int32, [1]> expand_dims_384 = const()[name = string("expand_dims_384"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_386 = const()[name = string("expand_dims_386"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_387_axes_0 = const()[name = string("expand_dims_387_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_387 = expand_dims(axes = expand_dims_387_axes_0, x = end_step_51)[name = string("expand_dims_387")];
+            tensor<int32, [1]> concat_532_values0_0 = const()[name = string("concat_532_values0_0"), val = tensor<int32, [1]>([24])];
+            int32 concat_532_axis_0 = const()[name = string("concat_532_axis_0"), val = int32(0)];
+            bool concat_532_interleave_0 = const()[name = string("concat_532_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_532 = concat(axis = concat_532_axis_0, interleave = concat_532_interleave_0, values = (concat_532_values0_0, expand_dims_384, expand_dims_1, expand_dims_386))[name = string("concat_532")];
+            tensor<int32, [1]> concat_533_values0_0 = const()[name = string("concat_533_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_533_values1_0 = const()[name = string("concat_533_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_533_values3_0 = const()[name = string("concat_533_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_533_axis_0 = const()[name = string("concat_533_axis_0"), val = int32(0)];
+            bool concat_533_interleave_0 = const()[name = string("concat_533_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_533 = concat(axis = concat_533_axis_0, interleave = concat_533_interleave_0, values = (concat_533_values0_0, concat_533_values1_0, expand_dims_387, concat_533_values3_0))[name = string("concat_533")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_25_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_25_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_25_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_25_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_532, begin_mask = k_cache1_internal_tensor_assign_25_begin_mask_0, end = concat_533, end_mask = k_cache1_internal_tensor_assign_25_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_25_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_25_stride_0, update = linear_193_cast_fp16, x = coreml_update_state_110)[name = string("k_cache1_internal_tensor_assign_25_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_25_cast_fp16, input = k_cache1)[name = string("coreml_update_state_112_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_112 = read_state(input = k_cache1)[name = string("coreml_update_state_112")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_25_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_25_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_25_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_25_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_532, begin_mask = v_cache1_internal_tensor_assign_25_begin_mask_0, end = concat_533, end_mask = v_cache1_internal_tensor_assign_25_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_25_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_25_stride_0, update = linear_194_cast_fp16, x = coreml_update_state_111)[name = string("v_cache1_internal_tensor_assign_25_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_25_cast_fp16, input = v_cache1)[name = string("coreml_update_state_113_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_113 = read_state(input = v_cache1)[name = string("coreml_update_state_113")];
+            int32 concat_538_values0_0 = const()[name = string("concat_538_values0_0"), val = int32(1)];
+            int32 concat_538_values2_0 = const()[name = string("concat_538_values2_0"), val = int32(1280)];
+            int32 concat_538_axis_0 = const()[name = string("concat_538_axis_0"), val = int32(0)];
+            bool concat_538_interleave_0 = const()[name = string("concat_538_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_538 = concat(axis = concat_538_axis_0, interleave = concat_538_interleave_0, values = (concat_538_values0_0, end_step_51, concat_538_values2_0))[name = string("concat_538")];
+            tensor<int32, [3]> var_5300_begin_0 = const()[name = string("op_5300_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5300_end_mask_0 = const()[name = string("op_5300_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5300_cast_fp16 = slice_by_index(begin = var_5300_begin_0, end = concat_538, end_mask = var_5300_end_mask_0, x = k_cache_97_cast_fp16)[name = string("op_5300_cast_fp16")];
+            tensor<int32, [3]> var_5303_begin_0 = const()[name = string("op_5303_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5303_end_mask_0 = const()[name = string("op_5303_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5303_cast_fp16 = slice_by_index(begin = var_5303_begin_0, end = concat_538, end_mask = var_5303_end_mask_0, x = v_cache_97_cast_fp16)[name = string("op_5303_cast_fp16")];
+            tensor<int32, [4]> concat_540x = const()[name = string("concat_540x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5313_cast_fp16 = reshape(shape = concat_540x, x = linear_192_cast_fp16)[name = string("op_5313_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_256_to_fp16 = const()[name = string("const_256_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_195_cast_fp16 = mul(x = var_5313_cast_fp16, y = const_256_to_fp16)[name = string("q_195_cast_fp16")];
+            tensor<int32, [4]> concat_541x = const()[name = string("concat_541x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5320_cast_fp16 = reshape(shape = concat_541x, x = var_5300_cast_fp16)[name = string("op_5320_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_257_to_fp16 = const()[name = string("const_257_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_245_cast_fp16 = mul(x = var_5320_cast_fp16, y = const_257_to_fp16)[name = string("k_245_cast_fp16")];
+            tensor<int32, [4]> concat_542x = const()[name = string("concat_542x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5327_cast_fp16 = reshape(shape = concat_542x, x = var_5303_cast_fp16)[name = string("op_5327_cast_fp16")];
+            tensor<int32, [4]> var_5328 = const()[name = string("op_5328"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_145_transpose_x_0 = const()[name = string("qk_145_transpose_x_0"), val = bool(false)];
+            bool qk_145_transpose_y_0 = const()[name = string("qk_145_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_353_perm_0 = const()[name = string("transpose_353_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_354_perm_0 = const()[name = string("transpose_354_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_354 = transpose(perm = transpose_354_perm_0, x = k_245_cast_fp16)[name = string("transpose_446")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_353 = transpose(perm = transpose_353_perm_0, x = q_195_cast_fp16)[name = string("transpose_447")];
+            tensor<fp16, [1, 20, ?, ?]> qk_145_cast_fp16 = matmul(transpose_x = qk_145_transpose_x_0, transpose_y = qk_145_transpose_y_0, x = transpose_353, y = transpose_354)[name = string("qk_145_cast_fp16")];
+            int32 concat_543_values1_0 = const()[name = string("concat_543_values1_0"), val = int32(448)];
+            int32 concat_543_axis_0 = const()[name = string("concat_543_axis_0"), val = int32(0)];
+            bool concat_543_interleave_0 = const()[name = string("concat_543_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_543 = concat(axis = concat_543_axis_0, interleave = concat_543_interleave_0, values = (gather_290_cast_uint16_to_int32, concat_543_values1_0))[name = string("concat_543")];
+            tensor<int32, [2]> var_5331_begin_0 = const()[name = string("op_5331_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5331_end_mask_0 = const()[name = string("op_5331_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5331_cast_fp16 = slice_by_index(begin = var_5331_begin_0, end = concat_543, end_mask = var_5331_end_mask_0, x = mask_to_fp16)[name = string("op_5331_cast_fp16")];
+            int32 concat_544_values0_0 = const()[name = string("concat_544_values0_0"), val = int32(0)];
+            int32 concat_544_axis_0 = const()[name = string("concat_544_axis_0"), val = int32(0)];
+            bool concat_544_interleave_0 = const()[name = string("concat_544_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_544 = concat(axis = concat_544_axis_0, interleave = concat_544_interleave_0, values = (concat_544_values0_0, gather_290_cast_uint16_to_int32))[name = string("concat_544")];
+            tensor<int32, [2]> var_5332_begin_0 = const()[name = string("op_5332_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5332_end_mask_0 = const()[name = string("op_5332_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5332_cast_fp16 = slice_by_index(begin = var_5332_begin_0, end = concat_544, end_mask = var_5332_end_mask_0, x = var_5331_cast_fp16)[name = string("op_5332_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_147_cast_fp16 = add(x = qk_145_cast_fp16, y = var_5332_cast_fp16)[name = string("qk_147_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_5335_cast_fp16 = softmax(axis = var_5244, x = qk_147_cast_fp16)[name = string("op_5335_cast_fp16")];
+            bool var_5337_transpose_x_0 = const()[name = string("op_5337_transpose_x_0"), val = bool(false)];
+            bool var_5337_transpose_y_0 = const()[name = string("op_5337_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_245_cast_fp16 = transpose(perm = var_5328, x = var_5327_cast_fp16)[name = string("transpose_448")];
+            tensor<fp16, [1, 20, ?, 64]> var_5337_cast_fp16 = matmul(transpose_x = var_5337_transpose_x_0, transpose_y = var_5337_transpose_y_0, x = var_5335_cast_fp16, y = v_245_cast_fp16)[name = string("op_5337_cast_fp16")];
+            tensor<int32, [4]> var_5338 = const()[name = string("op_5338"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_545x = const()[name = string("concat_545x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5339_cast_fp16 = transpose(perm = var_5338, x = var_5337_cast_fp16)[name = string("transpose_445")];
+            tensor<fp16, [1, ?, 1280]> x_439_cast_fp16 = reshape(shape = concat_545x, x = var_5339_cast_fp16)[name = string("x_439_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5343_to_fp16 = const()[name = string("op_5343_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1250029376)))];
+            tensor<fp16, [1280]> var_5344_to_fp16 = const()[name = string("op_5344_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253306240)))];
+            tensor<fp16, [1, ?, 1280]> linear_195_cast_fp16 = linear(bias = var_5344_to_fp16, weight = var_5343_to_fp16, x = x_439_cast_fp16)[name = string("linear_195_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_441_cast_fp16 = add(x = x_435_cast_fp16, y = linear_195_cast_fp16)[name = string("x_441_cast_fp16")];
+            tensor<int32, [1]> var_5351_axes_0 = const()[name = string("op_5351_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_24_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253308864)))];
+            tensor<fp16, [1280]> blocks_24_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253311488)))];
+            tensor<fp16, [1, ?, 1280]> var_5351_cast_fp16 = layer_norm(axes = var_5351_axes_0, beta = blocks_24_cross_attn_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_cross_attn_ln_weight_to_fp16, x = x_441_cast_fp16)[name = string("op_5351_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5360_to_fp16 = const()[name = string("op_5360_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253314112)))];
+            tensor<fp16, [1280]> var_5361_to_fp16 = const()[name = string("op_5361_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256590976)))];
+            tensor<fp16, [1, ?, 1280]> linear_196_cast_fp16 = linear(bias = var_5361_to_fp16, weight = var_5360_to_fp16, x = var_5351_cast_fp16)[name = string("linear_196_cast_fp16")];
+            tensor<int32, [3]> concat_546 = const()[name = string("concat_546"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_547 = const()[name = string("concat_547"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_247_internal_tensor_assign_1_stride_0 = const()[name = string("k_247_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_247_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_247_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_247_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_247_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_546, begin_mask = k_247_internal_tensor_assign_1_begin_mask_0, end = concat_547, end_mask = k_247_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_247_internal_tensor_assign_1_squeeze_mask_0, stride = k_247_internal_tensor_assign_1_stride_0, update = k_cache_99_cast_fp16, x = k_7_to_fp16)[name = string("k_247_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_548 = const()[name = string("concat_548"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_549 = const()[name = string("concat_549"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_247_internal_tensor_assign_1_stride_0 = const()[name = string("v_247_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_247_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_247_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_247_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_247_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_548, begin_mask = v_247_internal_tensor_assign_1_begin_mask_0, end = concat_549, end_mask = v_247_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_247_internal_tensor_assign_1_squeeze_mask_0, stride = v_247_internal_tensor_assign_1_stride_0, update = v_cache_99_cast_fp16, x = k_7_to_fp16)[name = string("v_247_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_550x = const()[name = string("concat_550x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5381_cast_fp16 = reshape(shape = concat_550x, x = linear_196_cast_fp16)[name = string("op_5381_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_199_cast_fp16 = mul(x = var_5381_cast_fp16, y = const_258_to_fp16)[name = string("q_199_cast_fp16")];
+            tensor<int32, [4]> var_5387 = const()[name = string("op_5387"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5388_cast_fp16 = reshape(shape = var_5387, x = k_247_internal_tensor_assign_1_cast_fp16)[name = string("op_5388_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_259_to_fp16 = const()[name = string("const_259_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_249_cast_fp16 = mul(x = var_5388_cast_fp16, y = const_259_to_fp16)[name = string("k_249_cast_fp16")];
+            tensor<int32, [4]> var_5394 = const()[name = string("op_5394"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5395_cast_fp16 = reshape(shape = var_5394, x = v_247_internal_tensor_assign_1_cast_fp16)[name = string("op_5395_cast_fp16")];
+            tensor<int32, [4]> var_5396 = const()[name = string("op_5396"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_149_transpose_x_0 = const()[name = string("qk_149_transpose_x_0"), val = bool(false)];
+            bool qk_149_transpose_y_0 = const()[name = string("qk_149_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_355_perm_0 = const()[name = string("transpose_355_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_356_perm_0 = const()[name = string("transpose_356_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_356 = transpose(perm = transpose_356_perm_0, x = k_249_cast_fp16)[name = string("transpose_442")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_355 = transpose(perm = transpose_355_perm_0, x = q_199_cast_fp16)[name = string("transpose_443")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_149_cast_fp16 = matmul(transpose_x = qk_149_transpose_x_0, transpose_y = qk_149_transpose_y_0, x = transpose_355, y = transpose_356)[name = string("qk_149_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_5400_cast_fp16 = softmax(axis = var_5244, x = qk_149_cast_fp16)[name = string("op_5400_cast_fp16")];
+            bool var_5402_transpose_x_0 = const()[name = string("op_5402_transpose_x_0"), val = bool(false)];
+            bool var_5402_transpose_y_0 = const()[name = string("op_5402_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_249_cast_fp16 = transpose(perm = var_5396, x = var_5395_cast_fp16)[name = string("transpose_444")];
+            tensor<fp16, [1, 20, ?, 64]> var_5402_cast_fp16 = matmul(transpose_x = var_5402_transpose_x_0, transpose_y = var_5402_transpose_y_0, x = var_5400_cast_fp16, y = v_249_cast_fp16)[name = string("op_5402_cast_fp16")];
+            tensor<int32, [4]> var_5403 = const()[name = string("op_5403"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_551x = const()[name = string("concat_551x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5404_cast_fp16 = transpose(perm = var_5403, x = var_5402_cast_fp16)[name = string("transpose_441")];
+            tensor<fp16, [1, ?, 1280]> x_445_cast_fp16 = reshape(shape = concat_551x, x = var_5404_cast_fp16)[name = string("x_445_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5408_to_fp16 = const()[name = string("op_5408_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256593600)))];
+            tensor<fp16, [1280]> var_5409_to_fp16 = const()[name = string("op_5409_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259870464)))];
+            tensor<fp16, [1, ?, 1280]> linear_197_cast_fp16 = linear(bias = var_5409_to_fp16, weight = var_5408_to_fp16, x = x_445_cast_fp16)[name = string("linear_197_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_447_cast_fp16 = add(x = x_441_cast_fp16, y = linear_197_cast_fp16)[name = string("x_447_cast_fp16")];
+            tensor<int32, [1]> var_5416_axes_0 = const()[name = string("op_5416_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_24_mlp_ln_weight_to_fp16 = const()[name = string("blocks_24_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259873088)))];
+            tensor<fp16, [1280]> blocks_24_mlp_ln_bias_to_fp16 = const()[name = string("blocks_24_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259875712)))];
+            tensor<fp16, [1, ?, 1280]> var_5416_cast_fp16 = layer_norm(axes = var_5416_axes_0, beta = blocks_24_mlp_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_mlp_ln_weight_to_fp16, x = x_447_cast_fp16)[name = string("op_5416_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_5425_to_fp16 = const()[name = string("op_5425_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259878336)))];
+            tensor<fp16, [5120]> var_5426_to_fp16 = const()[name = string("op_5426_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272985600)))];
+            tensor<fp16, [1, ?, 5120]> linear_198_cast_fp16 = linear(bias = var_5426_to_fp16, weight = var_5425_to_fp16, x = var_5416_cast_fp16)[name = string("linear_198_cast_fp16")];
+            string x_451_mode_0 = const()[name = string("x_451_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_451_cast_fp16 = gelu(mode = x_451_mode_0, x = linear_198_cast_fp16)[name = string("x_451_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_5431_to_fp16 = const()[name = string("op_5431_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272995904)))];
+            tensor<fp16, [1280]> var_5432_to_fp16 = const()[name = string("op_5432_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286103168)))];
+            tensor<fp16, [1, ?, 1280]> linear_199_cast_fp16 = linear(bias = var_5432_to_fp16, weight = var_5431_to_fp16, x = x_451_cast_fp16)[name = string("linear_199_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_453_cast_fp16 = add(x = x_447_cast_fp16, y = linear_199_cast_fp16)[name = string("x_453_cast_fp16")];
+            tensor<int32, [4]> k_cache_101_begin_0 = const()[name = string("k_cache_101_begin_0"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_101_end_0 = const()[name = string("k_cache_101_end_0"), val = tensor<int32, [4]>([26, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_101_end_mask_0 = const()[name = string("k_cache_101_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_101_squeeze_mask_0 = const()[name = string("k_cache_101_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_101_cast_fp16 = slice_by_index(begin = k_cache_101_begin_0, end = k_cache_101_end_0, end_mask = k_cache_101_end_mask_0, squeeze_mask = k_cache_101_squeeze_mask_0, x = coreml_update_state_112)[name = string("k_cache_101_cast_fp16")];
+            tensor<int32, [4]> v_cache_101_begin_0 = const()[name = string("v_cache_101_begin_0"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_101_end_0 = const()[name = string("v_cache_101_end_0"), val = tensor<int32, [4]>([26, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_101_end_mask_0 = const()[name = string("v_cache_101_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_101_squeeze_mask_0 = const()[name = string("v_cache_101_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_101_cast_fp16 = slice_by_index(begin = v_cache_101_begin_0, end = v_cache_101_end_0, end_mask = v_cache_101_end_mask_0, squeeze_mask = v_cache_101_squeeze_mask_0, x = coreml_update_state_113)[name = string("v_cache_101_cast_fp16")];
+            tensor<int32, [4]> k_cache_103_begin_0 = const()[name = string("k_cache_103_begin_0"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_103_end_0 = const()[name = string("k_cache_103_end_0"), val = tensor<int32, [4]>([26, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_103_end_mask_0 = const()[name = string("k_cache_103_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_103_squeeze_mask_0 = const()[name = string("k_cache_103_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_103_cast_fp16 = slice_by_index(begin = k_cache_103_begin_0, end = k_cache_103_end_0, end_mask = k_cache_103_end_mask_0, squeeze_mask = k_cache_103_squeeze_mask_0, x = read_state_2)[name = string("k_cache_103_cast_fp16")];
+            tensor<int32, [4]> v_cache_103_begin_0 = const()[name = string("v_cache_103_begin_0"), val = tensor<int32, [4]>([25, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_103_end_0 = const()[name = string("v_cache_103_end_0"), val = tensor<int32, [4]>([26, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_103_end_mask_0 = const()[name = string("v_cache_103_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_103_squeeze_mask_0 = const()[name = string("v_cache_103_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_103_cast_fp16 = slice_by_index(begin = v_cache_103_begin_0, end = v_cache_103_end_0, end_mask = v_cache_103_end_mask_0, squeeze_mask = v_cache_103_squeeze_mask_0, x = read_state_3)[name = string("v_cache_103_cast_fp16")];
+            int32 var_5455 = const()[name = string("op_5455"), val = int32(-1)];
+            tensor<int32, [1]> var_5473_axes_0 = const()[name = string("op_5473_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_25_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286105792)))];
+            tensor<fp16, [1280]> blocks_25_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286108416)))];
+            fp16 var_5461_to_fp16 = const()[name = string("op_5461_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_5473_cast_fp16 = layer_norm(axes = var_5473_axes_0, beta = blocks_25_attn_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_attn_ln_weight_to_fp16, x = x_453_cast_fp16)[name = string("op_5473_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5484_to_fp16 = const()[name = string("op_5484_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286111040)))];
+            tensor<fp16, [1280]> var_5485_to_fp16 = const()[name = string("op_5485_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1289387904)))];
+            tensor<fp16, [1, ?, 1280]> linear_200_cast_fp16 = linear(bias = var_5485_to_fp16, weight = var_5484_to_fp16, x = var_5473_cast_fp16)[name = string("linear_200_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5488_to_fp16 = const()[name = string("op_5488_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1289390528)))];
+            tensor<fp16, [1, ?, 1280]> linear_201_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5488_to_fp16, x = var_5473_cast_fp16)[name = string("linear_201_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5492_to_fp16 = const()[name = string("op_5492_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1292667392)))];
+            tensor<fp16, [1280]> var_5493_to_fp16 = const()[name = string("op_5493_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1295944256)))];
+            tensor<fp16, [1, ?, 1280]> linear_202_cast_fp16 = linear(bias = var_5493_to_fp16, weight = var_5492_to_fp16, x = var_5473_cast_fp16)[name = string("linear_202_cast_fp16")];
+            tensor<int32, [3]> var_5495_shape_cast_fp16 = shape(x = linear_200_cast_fp16)[name = string("op_5495_shape_cast_fp16")];
+            int32 gather_302_axis_0 = const()[name = string("gather_302_axis_0"), val = int32(0)];
+            int32 gather_302_batch_dims_0 = const()[name = string("gather_302_batch_dims_0"), val = int32(0)];
+            bool gather_302_validate_indices_0 = const()[name = string("gather_302_validate_indices_0"), val = bool(false)];
+            string var_5495_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5495_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_302_to_uint16 = const()[name = string("select_302_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5495_shape_cast_fp16_to_uint16 = cast(dtype = var_5495_shape_cast_fp16_to_uint16_dtype_0, x = var_5495_shape_cast_fp16)[name = string("cast_340")];
+            uint16 gather_302_cast_uint16 = gather(axis = gather_302_axis_0, batch_dims = gather_302_batch_dims_0, indices = select_302_to_uint16, validate_indices = gather_302_validate_indices_0, x = var_5495_shape_cast_fp16_to_uint16)[name = string("gather_302_cast_uint16")];
+            string gather_302_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_302_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_302_cast_uint16_to_int32 = cast(dtype = gather_302_cast_uint16_to_int32_dtype_0, x = gather_302_cast_uint16)[name = string("cast_339")];
+            int32 end_step_53 = add(x = offset, y = gather_302_cast_uint16_to_int32)[name = string("end_step_53")];
+            tensor<int32, [1]> expand_dims_400 = const()[name = string("expand_dims_400"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_402 = const()[name = string("expand_dims_402"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_403_axes_0 = const()[name = string("expand_dims_403_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_403 = expand_dims(axes = expand_dims_403_axes_0, x = end_step_53)[name = string("expand_dims_403")];
+            tensor<int32, [1]> concat_554_values0_0 = const()[name = string("concat_554_values0_0"), val = tensor<int32, [1]>([25])];
+            int32 concat_554_axis_0 = const()[name = string("concat_554_axis_0"), val = int32(0)];
+            bool concat_554_interleave_0 = const()[name = string("concat_554_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_554 = concat(axis = concat_554_axis_0, interleave = concat_554_interleave_0, values = (concat_554_values0_0, expand_dims_400, expand_dims_1, expand_dims_402))[name = string("concat_554")];
+            tensor<int32, [1]> concat_555_values0_0 = const()[name = string("concat_555_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_555_values1_0 = const()[name = string("concat_555_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_555_values3_0 = const()[name = string("concat_555_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_555_axis_0 = const()[name = string("concat_555_axis_0"), val = int32(0)];
+            bool concat_555_interleave_0 = const()[name = string("concat_555_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_555 = concat(axis = concat_555_axis_0, interleave = concat_555_interleave_0, values = (concat_555_values0_0, concat_555_values1_0, expand_dims_403, concat_555_values3_0))[name = string("concat_555")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_26_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_26_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_26_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_26_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_554, begin_mask = k_cache1_internal_tensor_assign_26_begin_mask_0, end = concat_555, end_mask = k_cache1_internal_tensor_assign_26_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_26_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_26_stride_0, update = linear_201_cast_fp16, x = coreml_update_state_112)[name = string("k_cache1_internal_tensor_assign_26_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_26_cast_fp16, input = k_cache1)[name = string("coreml_update_state_114_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_114 = read_state(input = k_cache1)[name = string("coreml_update_state_114")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_26_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_26_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_26_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_26_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_554, begin_mask = v_cache1_internal_tensor_assign_26_begin_mask_0, end = concat_555, end_mask = v_cache1_internal_tensor_assign_26_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_26_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_26_stride_0, update = linear_202_cast_fp16, x = coreml_update_state_113)[name = string("v_cache1_internal_tensor_assign_26_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_26_cast_fp16, input = v_cache1)[name = string("coreml_update_state_115_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_115 = read_state(input = v_cache1)[name = string("coreml_update_state_115")];
+            int32 concat_560_values0_0 = const()[name = string("concat_560_values0_0"), val = int32(1)];
+            int32 concat_560_values2_0 = const()[name = string("concat_560_values2_0"), val = int32(1280)];
+            int32 concat_560_axis_0 = const()[name = string("concat_560_axis_0"), val = int32(0)];
+            bool concat_560_interleave_0 = const()[name = string("concat_560_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_560 = concat(axis = concat_560_axis_0, interleave = concat_560_interleave_0, values = (concat_560_values0_0, end_step_53, concat_560_values2_0))[name = string("concat_560")];
+            tensor<int32, [3]> var_5511_begin_0 = const()[name = string("op_5511_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5511_end_mask_0 = const()[name = string("op_5511_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5511_cast_fp16 = slice_by_index(begin = var_5511_begin_0, end = concat_560, end_mask = var_5511_end_mask_0, x = k_cache_101_cast_fp16)[name = string("op_5511_cast_fp16")];
+            tensor<int32, [3]> var_5514_begin_0 = const()[name = string("op_5514_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5514_end_mask_0 = const()[name = string("op_5514_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5514_cast_fp16 = slice_by_index(begin = var_5514_begin_0, end = concat_560, end_mask = var_5514_end_mask_0, x = v_cache_101_cast_fp16)[name = string("op_5514_cast_fp16")];
+            tensor<int32, [4]> concat_562x = const()[name = string("concat_562x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5524_cast_fp16 = reshape(shape = concat_562x, x = linear_200_cast_fp16)[name = string("op_5524_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_203_cast_fp16 = mul(x = var_5524_cast_fp16, y = const_260_to_fp16)[name = string("q_203_cast_fp16")];
+            tensor<int32, [4]> concat_563x = const()[name = string("concat_563x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5531_cast_fp16 = reshape(shape = concat_563x, x = var_5511_cast_fp16)[name = string("op_5531_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_255_cast_fp16 = mul(x = var_5531_cast_fp16, y = const_261_to_fp16)[name = string("k_255_cast_fp16")];
+            tensor<int32, [4]> concat_564x = const()[name = string("concat_564x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5538_cast_fp16 = reshape(shape = concat_564x, x = var_5514_cast_fp16)[name = string("op_5538_cast_fp16")];
+            tensor<int32, [4]> var_5539 = const()[name = string("op_5539"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_151_transpose_x_0 = const()[name = string("qk_151_transpose_x_0"), val = bool(false)];
+            bool qk_151_transpose_y_0 = const()[name = string("qk_151_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_357_perm_0 = const()[name = string("transpose_357_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_358_perm_0 = const()[name = string("transpose_358_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_358 = transpose(perm = transpose_358_perm_0, x = k_255_cast_fp16)[name = string("transpose_438")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_357 = transpose(perm = transpose_357_perm_0, x = q_203_cast_fp16)[name = string("transpose_439")];
+            tensor<fp16, [1, 20, ?, ?]> qk_151_cast_fp16 = matmul(transpose_x = qk_151_transpose_x_0, transpose_y = qk_151_transpose_y_0, x = transpose_357, y = transpose_358)[name = string("qk_151_cast_fp16")];
+            int32 concat_565_values1_0 = const()[name = string("concat_565_values1_0"), val = int32(448)];
+            int32 concat_565_axis_0 = const()[name = string("concat_565_axis_0"), val = int32(0)];
+            bool concat_565_interleave_0 = const()[name = string("concat_565_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_565 = concat(axis = concat_565_axis_0, interleave = concat_565_interleave_0, values = (gather_302_cast_uint16_to_int32, concat_565_values1_0))[name = string("concat_565")];
+            tensor<int32, [2]> var_5542_begin_0 = const()[name = string("op_5542_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5542_end_mask_0 = const()[name = string("op_5542_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5542_cast_fp16 = slice_by_index(begin = var_5542_begin_0, end = concat_565, end_mask = var_5542_end_mask_0, x = mask_to_fp16)[name = string("op_5542_cast_fp16")];
+            int32 concat_566_values0_0 = const()[name = string("concat_566_values0_0"), val = int32(0)];
+            int32 concat_566_axis_0 = const()[name = string("concat_566_axis_0"), val = int32(0)];
+            bool concat_566_interleave_0 = const()[name = string("concat_566_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_566 = concat(axis = concat_566_axis_0, interleave = concat_566_interleave_0, values = (concat_566_values0_0, gather_302_cast_uint16_to_int32))[name = string("concat_566")];
+            tensor<int32, [2]> var_5543_begin_0 = const()[name = string("op_5543_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5543_end_mask_0 = const()[name = string("op_5543_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5543_cast_fp16 = slice_by_index(begin = var_5543_begin_0, end = concat_566, end_mask = var_5543_end_mask_0, x = var_5542_cast_fp16)[name = string("op_5543_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_153_cast_fp16 = add(x = qk_151_cast_fp16, y = var_5543_cast_fp16)[name = string("qk_153_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_5546_cast_fp16 = softmax(axis = var_5455, x = qk_153_cast_fp16)[name = string("op_5546_cast_fp16")];
+            bool var_5548_transpose_x_0 = const()[name = string("op_5548_transpose_x_0"), val = bool(false)];
+            bool var_5548_transpose_y_0 = const()[name = string("op_5548_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_255_cast_fp16 = transpose(perm = var_5539, x = var_5538_cast_fp16)[name = string("transpose_440")];
+            tensor<fp16, [1, 20, ?, 64]> var_5548_cast_fp16 = matmul(transpose_x = var_5548_transpose_x_0, transpose_y = var_5548_transpose_y_0, x = var_5546_cast_fp16, y = v_255_cast_fp16)[name = string("op_5548_cast_fp16")];
+            tensor<int32, [4]> var_5549 = const()[name = string("op_5549"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_567x = const()[name = string("concat_567x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5550_cast_fp16 = transpose(perm = var_5549, x = var_5548_cast_fp16)[name = string("transpose_437")];
+            tensor<fp16, [1, ?, 1280]> x_457_cast_fp16 = reshape(shape = concat_567x, x = var_5550_cast_fp16)[name = string("x_457_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5554_to_fp16 = const()[name = string("op_5554_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1295946880)))];
+            tensor<fp16, [1280]> var_5555_to_fp16 = const()[name = string("op_5555_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299223744)))];
+            tensor<fp16, [1, ?, 1280]> linear_203_cast_fp16 = linear(bias = var_5555_to_fp16, weight = var_5554_to_fp16, x = x_457_cast_fp16)[name = string("linear_203_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_459_cast_fp16 = add(x = x_453_cast_fp16, y = linear_203_cast_fp16)[name = string("x_459_cast_fp16")];
+            tensor<int32, [1]> var_5562_axes_0 = const()[name = string("op_5562_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_25_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299226368)))];
+            tensor<fp16, [1280]> blocks_25_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299228992)))];
+            tensor<fp16, [1, ?, 1280]> var_5562_cast_fp16 = layer_norm(axes = var_5562_axes_0, beta = blocks_25_cross_attn_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_cross_attn_ln_weight_to_fp16, x = x_459_cast_fp16)[name = string("op_5562_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5571_to_fp16 = const()[name = string("op_5571_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299231616)))];
+            tensor<fp16, [1280]> var_5572_to_fp16 = const()[name = string("op_5572_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1302508480)))];
+            tensor<fp16, [1, ?, 1280]> linear_204_cast_fp16 = linear(bias = var_5572_to_fp16, weight = var_5571_to_fp16, x = var_5562_cast_fp16)[name = string("linear_204_cast_fp16")];
+            tensor<int32, [3]> concat_568 = const()[name = string("concat_568"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_569 = const()[name = string("concat_569"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_257_internal_tensor_assign_1_stride_0 = const()[name = string("k_257_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_257_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_257_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_257_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_257_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_568, begin_mask = k_257_internal_tensor_assign_1_begin_mask_0, end = concat_569, end_mask = k_257_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_257_internal_tensor_assign_1_squeeze_mask_0, stride = k_257_internal_tensor_assign_1_stride_0, update = k_cache_103_cast_fp16, x = k_7_to_fp16)[name = string("k_257_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_570 = const()[name = string("concat_570"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_571 = const()[name = string("concat_571"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_257_internal_tensor_assign_1_stride_0 = const()[name = string("v_257_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_257_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_257_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_257_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_257_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_570, begin_mask = v_257_internal_tensor_assign_1_begin_mask_0, end = concat_571, end_mask = v_257_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_257_internal_tensor_assign_1_squeeze_mask_0, stride = v_257_internal_tensor_assign_1_stride_0, update = v_cache_103_cast_fp16, x = k_7_to_fp16)[name = string("v_257_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_572x = const()[name = string("concat_572x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5592_cast_fp16 = reshape(shape = concat_572x, x = linear_204_cast_fp16)[name = string("op_5592_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_262_to_fp16 = const()[name = string("const_262_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_207_cast_fp16 = mul(x = var_5592_cast_fp16, y = const_262_to_fp16)[name = string("q_207_cast_fp16")];
+            tensor<int32, [4]> var_5598 = const()[name = string("op_5598"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5599_cast_fp16 = reshape(shape = var_5598, x = k_257_internal_tensor_assign_1_cast_fp16)[name = string("op_5599_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_259_cast_fp16 = mul(x = var_5599_cast_fp16, y = const_263_to_fp16)[name = string("k_259_cast_fp16")];
+            tensor<int32, [4]> var_5605 = const()[name = string("op_5605"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5606_cast_fp16 = reshape(shape = var_5605, x = v_257_internal_tensor_assign_1_cast_fp16)[name = string("op_5606_cast_fp16")];
+            tensor<int32, [4]> var_5607 = const()[name = string("op_5607"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_155_transpose_x_0 = const()[name = string("qk_155_transpose_x_0"), val = bool(false)];
+            bool qk_155_transpose_y_0 = const()[name = string("qk_155_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_359_perm_0 = const()[name = string("transpose_359_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_360_perm_0 = const()[name = string("transpose_360_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_360 = transpose(perm = transpose_360_perm_0, x = k_259_cast_fp16)[name = string("transpose_434")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_359 = transpose(perm = transpose_359_perm_0, x = q_207_cast_fp16)[name = string("transpose_435")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_155_cast_fp16 = matmul(transpose_x = qk_155_transpose_x_0, transpose_y = qk_155_transpose_y_0, x = transpose_359, y = transpose_360)[name = string("qk_155_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_5611_cast_fp16 = softmax(axis = var_5455, x = qk_155_cast_fp16)[name = string("op_5611_cast_fp16")];
+            bool var_5613_transpose_x_0 = const()[name = string("op_5613_transpose_x_0"), val = bool(false)];
+            bool var_5613_transpose_y_0 = const()[name = string("op_5613_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_259_cast_fp16 = transpose(perm = var_5607, x = var_5606_cast_fp16)[name = string("transpose_436")];
+            tensor<fp16, [1, 20, ?, 64]> var_5613_cast_fp16 = matmul(transpose_x = var_5613_transpose_x_0, transpose_y = var_5613_transpose_y_0, x = var_5611_cast_fp16, y = v_259_cast_fp16)[name = string("op_5613_cast_fp16")];
+            tensor<int32, [4]> var_5614 = const()[name = string("op_5614"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_573x = const()[name = string("concat_573x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5615_cast_fp16 = transpose(perm = var_5614, x = var_5613_cast_fp16)[name = string("transpose_433")];
+            tensor<fp16, [1, ?, 1280]> x_463_cast_fp16 = reshape(shape = concat_573x, x = var_5615_cast_fp16)[name = string("x_463_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5619_to_fp16 = const()[name = string("op_5619_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1302511104)))];
+            tensor<fp16, [1280]> var_5620_to_fp16 = const()[name = string("op_5620_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305787968)))];
+            tensor<fp16, [1, ?, 1280]> linear_205_cast_fp16 = linear(bias = var_5620_to_fp16, weight = var_5619_to_fp16, x = x_463_cast_fp16)[name = string("linear_205_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_465_cast_fp16 = add(x = x_459_cast_fp16, y = linear_205_cast_fp16)[name = string("x_465_cast_fp16")];
+            tensor<int32, [1]> var_5627_axes_0 = const()[name = string("op_5627_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_25_mlp_ln_weight_to_fp16 = const()[name = string("blocks_25_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305790592)))];
+            tensor<fp16, [1280]> blocks_25_mlp_ln_bias_to_fp16 = const()[name = string("blocks_25_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305793216)))];
+            tensor<fp16, [1, ?, 1280]> var_5627_cast_fp16 = layer_norm(axes = var_5627_axes_0, beta = blocks_25_mlp_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_mlp_ln_weight_to_fp16, x = x_465_cast_fp16)[name = string("op_5627_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_5636_to_fp16 = const()[name = string("op_5636_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305795840)))];
+            tensor<fp16, [5120]> var_5637_to_fp16 = const()[name = string("op_5637_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1318903104)))];
+            tensor<fp16, [1, ?, 5120]> linear_206_cast_fp16 = linear(bias = var_5637_to_fp16, weight = var_5636_to_fp16, x = var_5627_cast_fp16)[name = string("linear_206_cast_fp16")];
+            string x_469_mode_0 = const()[name = string("x_469_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_469_cast_fp16 = gelu(mode = x_469_mode_0, x = linear_206_cast_fp16)[name = string("x_469_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_5642_to_fp16 = const()[name = string("op_5642_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1318913408)))];
+            tensor<fp16, [1280]> var_5643_to_fp16 = const()[name = string("op_5643_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332020672)))];
+            tensor<fp16, [1, ?, 1280]> linear_207_cast_fp16 = linear(bias = var_5643_to_fp16, weight = var_5642_to_fp16, x = x_469_cast_fp16)[name = string("linear_207_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_471_cast_fp16 = add(x = x_465_cast_fp16, y = linear_207_cast_fp16)[name = string("x_471_cast_fp16")];
+            tensor<int32, [4]> k_cache_105_begin_0 = const()[name = string("k_cache_105_begin_0"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_105_end_0 = const()[name = string("k_cache_105_end_0"), val = tensor<int32, [4]>([27, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_105_end_mask_0 = const()[name = string("k_cache_105_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_105_squeeze_mask_0 = const()[name = string("k_cache_105_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_105_cast_fp16 = slice_by_index(begin = k_cache_105_begin_0, end = k_cache_105_end_0, end_mask = k_cache_105_end_mask_0, squeeze_mask = k_cache_105_squeeze_mask_0, x = coreml_update_state_114)[name = string("k_cache_105_cast_fp16")];
+            tensor<int32, [4]> v_cache_105_begin_0 = const()[name = string("v_cache_105_begin_0"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_105_end_0 = const()[name = string("v_cache_105_end_0"), val = tensor<int32, [4]>([27, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_105_end_mask_0 = const()[name = string("v_cache_105_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_105_squeeze_mask_0 = const()[name = string("v_cache_105_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_105_cast_fp16 = slice_by_index(begin = v_cache_105_begin_0, end = v_cache_105_end_0, end_mask = v_cache_105_end_mask_0, squeeze_mask = v_cache_105_squeeze_mask_0, x = coreml_update_state_115)[name = string("v_cache_105_cast_fp16")];
+            tensor<int32, [4]> k_cache_107_begin_0 = const()[name = string("k_cache_107_begin_0"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_107_end_0 = const()[name = string("k_cache_107_end_0"), val = tensor<int32, [4]>([27, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_107_end_mask_0 = const()[name = string("k_cache_107_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_107_squeeze_mask_0 = const()[name = string("k_cache_107_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_107_cast_fp16 = slice_by_index(begin = k_cache_107_begin_0, end = k_cache_107_end_0, end_mask = k_cache_107_end_mask_0, squeeze_mask = k_cache_107_squeeze_mask_0, x = read_state_2)[name = string("k_cache_107_cast_fp16")];
+            tensor<int32, [4]> v_cache_107_begin_0 = const()[name = string("v_cache_107_begin_0"), val = tensor<int32, [4]>([26, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_107_end_0 = const()[name = string("v_cache_107_end_0"), val = tensor<int32, [4]>([27, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_107_end_mask_0 = const()[name = string("v_cache_107_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_107_squeeze_mask_0 = const()[name = string("v_cache_107_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_107_cast_fp16 = slice_by_index(begin = v_cache_107_begin_0, end = v_cache_107_end_0, end_mask = v_cache_107_end_mask_0, squeeze_mask = v_cache_107_squeeze_mask_0, x = read_state_3)[name = string("v_cache_107_cast_fp16")];
+            int32 var_5666 = const()[name = string("op_5666"), val = int32(-1)];
+            tensor<int32, [1]> var_5684_axes_0 = const()[name = string("op_5684_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_26_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332023296)))];
+            tensor<fp16, [1280]> blocks_26_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332025920)))];
+            fp16 var_5672_to_fp16 = const()[name = string("op_5672_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_5684_cast_fp16 = layer_norm(axes = var_5684_axes_0, beta = blocks_26_attn_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_attn_ln_weight_to_fp16, x = x_471_cast_fp16)[name = string("op_5684_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5695_to_fp16 = const()[name = string("op_5695_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332028544)))];
+            tensor<fp16, [1280]> var_5696_to_fp16 = const()[name = string("op_5696_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1335305408)))];
+            tensor<fp16, [1, ?, 1280]> linear_208_cast_fp16 = linear(bias = var_5696_to_fp16, weight = var_5695_to_fp16, x = var_5684_cast_fp16)[name = string("linear_208_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5699_to_fp16 = const()[name = string("op_5699_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1335308032)))];
+            tensor<fp16, [1, ?, 1280]> linear_209_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5699_to_fp16, x = var_5684_cast_fp16)[name = string("linear_209_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5703_to_fp16 = const()[name = string("op_5703_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1338584896)))];
+            tensor<fp16, [1280]> var_5704_to_fp16 = const()[name = string("op_5704_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1341861760)))];
+            tensor<fp16, [1, ?, 1280]> linear_210_cast_fp16 = linear(bias = var_5704_to_fp16, weight = var_5703_to_fp16, x = var_5684_cast_fp16)[name = string("linear_210_cast_fp16")];
+            tensor<int32, [3]> var_5706_shape_cast_fp16 = shape(x = linear_208_cast_fp16)[name = string("op_5706_shape_cast_fp16")];
+            int32 gather_314_axis_0 = const()[name = string("gather_314_axis_0"), val = int32(0)];
+            int32 gather_314_batch_dims_0 = const()[name = string("gather_314_batch_dims_0"), val = int32(0)];
+            bool gather_314_validate_indices_0 = const()[name = string("gather_314_validate_indices_0"), val = bool(false)];
+            string var_5706_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5706_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_314_to_uint16 = const()[name = string("select_314_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5706_shape_cast_fp16_to_uint16 = cast(dtype = var_5706_shape_cast_fp16_to_uint16_dtype_0, x = var_5706_shape_cast_fp16)[name = string("cast_338")];
+            uint16 gather_314_cast_uint16 = gather(axis = gather_314_axis_0, batch_dims = gather_314_batch_dims_0, indices = select_314_to_uint16, validate_indices = gather_314_validate_indices_0, x = var_5706_shape_cast_fp16_to_uint16)[name = string("gather_314_cast_uint16")];
+            string gather_314_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_314_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_314_cast_uint16_to_int32 = cast(dtype = gather_314_cast_uint16_to_int32_dtype_0, x = gather_314_cast_uint16)[name = string("cast_337")];
+            int32 end_step_55 = add(x = offset, y = gather_314_cast_uint16_to_int32)[name = string("end_step_55")];
+            tensor<int32, [1]> expand_dims_416 = const()[name = string("expand_dims_416"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_418 = const()[name = string("expand_dims_418"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_419_axes_0 = const()[name = string("expand_dims_419_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_419 = expand_dims(axes = expand_dims_419_axes_0, x = end_step_55)[name = string("expand_dims_419")];
+            tensor<int32, [1]> concat_576_values0_0 = const()[name = string("concat_576_values0_0"), val = tensor<int32, [1]>([26])];
+            int32 concat_576_axis_0 = const()[name = string("concat_576_axis_0"), val = int32(0)];
+            bool concat_576_interleave_0 = const()[name = string("concat_576_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_576 = concat(axis = concat_576_axis_0, interleave = concat_576_interleave_0, values = (concat_576_values0_0, expand_dims_416, expand_dims_1, expand_dims_418))[name = string("concat_576")];
+            tensor<int32, [1]> concat_577_values0_0 = const()[name = string("concat_577_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_577_values1_0 = const()[name = string("concat_577_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_577_values3_0 = const()[name = string("concat_577_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_577_axis_0 = const()[name = string("concat_577_axis_0"), val = int32(0)];
+            bool concat_577_interleave_0 = const()[name = string("concat_577_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_577 = concat(axis = concat_577_axis_0, interleave = concat_577_interleave_0, values = (concat_577_values0_0, concat_577_values1_0, expand_dims_419, concat_577_values3_0))[name = string("concat_577")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_27_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_27_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_27_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_27_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_576, begin_mask = k_cache1_internal_tensor_assign_27_begin_mask_0, end = concat_577, end_mask = k_cache1_internal_tensor_assign_27_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_27_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_27_stride_0, update = linear_209_cast_fp16, x = coreml_update_state_114)[name = string("k_cache1_internal_tensor_assign_27_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_27_cast_fp16, input = k_cache1)[name = string("coreml_update_state_116_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_116 = read_state(input = k_cache1)[name = string("coreml_update_state_116")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_27_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_27_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_27_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_27_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_576, begin_mask = v_cache1_internal_tensor_assign_27_begin_mask_0, end = concat_577, end_mask = v_cache1_internal_tensor_assign_27_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_27_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_27_stride_0, update = linear_210_cast_fp16, x = coreml_update_state_115)[name = string("v_cache1_internal_tensor_assign_27_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_27_cast_fp16, input = v_cache1)[name = string("coreml_update_state_117_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_117 = read_state(input = v_cache1)[name = string("coreml_update_state_117")];
+            int32 concat_582_values0_0 = const()[name = string("concat_582_values0_0"), val = int32(1)];
+            int32 concat_582_values2_0 = const()[name = string("concat_582_values2_0"), val = int32(1280)];
+            int32 concat_582_axis_0 = const()[name = string("concat_582_axis_0"), val = int32(0)];
+            bool concat_582_interleave_0 = const()[name = string("concat_582_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_582 = concat(axis = concat_582_axis_0, interleave = concat_582_interleave_0, values = (concat_582_values0_0, end_step_55, concat_582_values2_0))[name = string("concat_582")];
+            tensor<int32, [3]> var_5722_begin_0 = const()[name = string("op_5722_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5722_end_mask_0 = const()[name = string("op_5722_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5722_cast_fp16 = slice_by_index(begin = var_5722_begin_0, end = concat_582, end_mask = var_5722_end_mask_0, x = k_cache_105_cast_fp16)[name = string("op_5722_cast_fp16")];
+            tensor<int32, [3]> var_5725_begin_0 = const()[name = string("op_5725_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5725_end_mask_0 = const()[name = string("op_5725_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5725_cast_fp16 = slice_by_index(begin = var_5725_begin_0, end = concat_582, end_mask = var_5725_end_mask_0, x = v_cache_105_cast_fp16)[name = string("op_5725_cast_fp16")];
+            tensor<int32, [4]> concat_584x = const()[name = string("concat_584x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5735_cast_fp16 = reshape(shape = concat_584x, x = linear_208_cast_fp16)[name = string("op_5735_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_264_to_fp16 = const()[name = string("const_264_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_211_cast_fp16 = mul(x = var_5735_cast_fp16, y = const_264_to_fp16)[name = string("q_211_cast_fp16")];
+            tensor<int32, [4]> concat_585x = const()[name = string("concat_585x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5742_cast_fp16 = reshape(shape = concat_585x, x = var_5722_cast_fp16)[name = string("op_5742_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_265_to_fp16 = const()[name = string("const_265_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_265_cast_fp16 = mul(x = var_5742_cast_fp16, y = const_265_to_fp16)[name = string("k_265_cast_fp16")];
+            tensor<int32, [4]> concat_586x = const()[name = string("concat_586x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5749_cast_fp16 = reshape(shape = concat_586x, x = var_5725_cast_fp16)[name = string("op_5749_cast_fp16")];
+            tensor<int32, [4]> var_5750 = const()[name = string("op_5750"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_157_transpose_x_0 = const()[name = string("qk_157_transpose_x_0"), val = bool(false)];
+            bool qk_157_transpose_y_0 = const()[name = string("qk_157_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_361_perm_0 = const()[name = string("transpose_361_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_362_perm_0 = const()[name = string("transpose_362_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_362 = transpose(perm = transpose_362_perm_0, x = k_265_cast_fp16)[name = string("transpose_430")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_361 = transpose(perm = transpose_361_perm_0, x = q_211_cast_fp16)[name = string("transpose_431")];
+            tensor<fp16, [1, 20, ?, ?]> qk_157_cast_fp16 = matmul(transpose_x = qk_157_transpose_x_0, transpose_y = qk_157_transpose_y_0, x = transpose_361, y = transpose_362)[name = string("qk_157_cast_fp16")];
+            int32 concat_587_values1_0 = const()[name = string("concat_587_values1_0"), val = int32(448)];
+            int32 concat_587_axis_0 = const()[name = string("concat_587_axis_0"), val = int32(0)];
+            bool concat_587_interleave_0 = const()[name = string("concat_587_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_587 = concat(axis = concat_587_axis_0, interleave = concat_587_interleave_0, values = (gather_314_cast_uint16_to_int32, concat_587_values1_0))[name = string("concat_587")];
+            tensor<int32, [2]> var_5753_begin_0 = const()[name = string("op_5753_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5753_end_mask_0 = const()[name = string("op_5753_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5753_cast_fp16 = slice_by_index(begin = var_5753_begin_0, end = concat_587, end_mask = var_5753_end_mask_0, x = mask_to_fp16)[name = string("op_5753_cast_fp16")];
+            int32 concat_588_values0_0 = const()[name = string("concat_588_values0_0"), val = int32(0)];
+            int32 concat_588_axis_0 = const()[name = string("concat_588_axis_0"), val = int32(0)];
+            bool concat_588_interleave_0 = const()[name = string("concat_588_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_588 = concat(axis = concat_588_axis_0, interleave = concat_588_interleave_0, values = (concat_588_values0_0, gather_314_cast_uint16_to_int32))[name = string("concat_588")];
+            tensor<int32, [2]> var_5754_begin_0 = const()[name = string("op_5754_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5754_end_mask_0 = const()[name = string("op_5754_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5754_cast_fp16 = slice_by_index(begin = var_5754_begin_0, end = concat_588, end_mask = var_5754_end_mask_0, x = var_5753_cast_fp16)[name = string("op_5754_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_159_cast_fp16 = add(x = qk_157_cast_fp16, y = var_5754_cast_fp16)[name = string("qk_159_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_5757_cast_fp16 = softmax(axis = var_5666, x = qk_159_cast_fp16)[name = string("op_5757_cast_fp16")];
+            bool var_5759_transpose_x_0 = const()[name = string("op_5759_transpose_x_0"), val = bool(false)];
+            bool var_5759_transpose_y_0 = const()[name = string("op_5759_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_265_cast_fp16 = transpose(perm = var_5750, x = var_5749_cast_fp16)[name = string("transpose_432")];
+            tensor<fp16, [1, 20, ?, 64]> var_5759_cast_fp16 = matmul(transpose_x = var_5759_transpose_x_0, transpose_y = var_5759_transpose_y_0, x = var_5757_cast_fp16, y = v_265_cast_fp16)[name = string("op_5759_cast_fp16")];
+            tensor<int32, [4]> var_5760 = const()[name = string("op_5760"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_589x = const()[name = string("concat_589x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5761_cast_fp16 = transpose(perm = var_5760, x = var_5759_cast_fp16)[name = string("transpose_429")];
+            tensor<fp16, [1, ?, 1280]> x_475_cast_fp16 = reshape(shape = concat_589x, x = var_5761_cast_fp16)[name = string("x_475_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5765_to_fp16 = const()[name = string("op_5765_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1341864384)))];
+            tensor<fp16, [1280]> var_5766_to_fp16 = const()[name = string("op_5766_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345141248)))];
+            tensor<fp16, [1, ?, 1280]> linear_211_cast_fp16 = linear(bias = var_5766_to_fp16, weight = var_5765_to_fp16, x = x_475_cast_fp16)[name = string("linear_211_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_477_cast_fp16 = add(x = x_471_cast_fp16, y = linear_211_cast_fp16)[name = string("x_477_cast_fp16")];
+            tensor<int32, [1]> var_5773_axes_0 = const()[name = string("op_5773_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_26_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345143872)))];
+            tensor<fp16, [1280]> blocks_26_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345146496)))];
+            tensor<fp16, [1, ?, 1280]> var_5773_cast_fp16 = layer_norm(axes = var_5773_axes_0, beta = blocks_26_cross_attn_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_cross_attn_ln_weight_to_fp16, x = x_477_cast_fp16)[name = string("op_5773_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5782_to_fp16 = const()[name = string("op_5782_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345149120)))];
+            tensor<fp16, [1280]> var_5783_to_fp16 = const()[name = string("op_5783_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348425984)))];
+            tensor<fp16, [1, ?, 1280]> linear_212_cast_fp16 = linear(bias = var_5783_to_fp16, weight = var_5782_to_fp16, x = var_5773_cast_fp16)[name = string("linear_212_cast_fp16")];
+            tensor<int32, [3]> concat_590 = const()[name = string("concat_590"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_591 = const()[name = string("concat_591"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_267_internal_tensor_assign_1_stride_0 = const()[name = string("k_267_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_267_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_267_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_267_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_267_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_590, begin_mask = k_267_internal_tensor_assign_1_begin_mask_0, end = concat_591, end_mask = k_267_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_267_internal_tensor_assign_1_squeeze_mask_0, stride = k_267_internal_tensor_assign_1_stride_0, update = k_cache_107_cast_fp16, x = k_7_to_fp16)[name = string("k_267_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_592 = const()[name = string("concat_592"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_593 = const()[name = string("concat_593"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_267_internal_tensor_assign_1_stride_0 = const()[name = string("v_267_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_267_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_267_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_267_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_267_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_592, begin_mask = v_267_internal_tensor_assign_1_begin_mask_0, end = concat_593, end_mask = v_267_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_267_internal_tensor_assign_1_squeeze_mask_0, stride = v_267_internal_tensor_assign_1_stride_0, update = v_cache_107_cast_fp16, x = k_7_to_fp16)[name = string("v_267_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_594x = const()[name = string("concat_594x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5803_cast_fp16 = reshape(shape = concat_594x, x = linear_212_cast_fp16)[name = string("op_5803_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_266_to_fp16 = const()[name = string("const_266_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_215_cast_fp16 = mul(x = var_5803_cast_fp16, y = const_266_to_fp16)[name = string("q_215_cast_fp16")];
+            tensor<int32, [4]> var_5809 = const()[name = string("op_5809"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5810_cast_fp16 = reshape(shape = var_5809, x = k_267_internal_tensor_assign_1_cast_fp16)[name = string("op_5810_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_269_cast_fp16 = mul(x = var_5810_cast_fp16, y = const_267_to_fp16)[name = string("k_269_cast_fp16")];
+            tensor<int32, [4]> var_5816 = const()[name = string("op_5816"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_5817_cast_fp16 = reshape(shape = var_5816, x = v_267_internal_tensor_assign_1_cast_fp16)[name = string("op_5817_cast_fp16")];
+            tensor<int32, [4]> var_5818 = const()[name = string("op_5818"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_161_transpose_x_0 = const()[name = string("qk_161_transpose_x_0"), val = bool(false)];
+            bool qk_161_transpose_y_0 = const()[name = string("qk_161_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_363_perm_0 = const()[name = string("transpose_363_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_364_perm_0 = const()[name = string("transpose_364_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_364 = transpose(perm = transpose_364_perm_0, x = k_269_cast_fp16)[name = string("transpose_426")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_363 = transpose(perm = transpose_363_perm_0, x = q_215_cast_fp16)[name = string("transpose_427")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_161_cast_fp16 = matmul(transpose_x = qk_161_transpose_x_0, transpose_y = qk_161_transpose_y_0, x = transpose_363, y = transpose_364)[name = string("qk_161_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_5822_cast_fp16 = softmax(axis = var_5666, x = qk_161_cast_fp16)[name = string("op_5822_cast_fp16")];
+            bool var_5824_transpose_x_0 = const()[name = string("op_5824_transpose_x_0"), val = bool(false)];
+            bool var_5824_transpose_y_0 = const()[name = string("op_5824_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_269_cast_fp16 = transpose(perm = var_5818, x = var_5817_cast_fp16)[name = string("transpose_428")];
+            tensor<fp16, [1, 20, ?, 64]> var_5824_cast_fp16 = matmul(transpose_x = var_5824_transpose_x_0, transpose_y = var_5824_transpose_y_0, x = var_5822_cast_fp16, y = v_269_cast_fp16)[name = string("op_5824_cast_fp16")];
+            tensor<int32, [4]> var_5825 = const()[name = string("op_5825"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_595x = const()[name = string("concat_595x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5826_cast_fp16 = transpose(perm = var_5825, x = var_5824_cast_fp16)[name = string("transpose_425")];
+            tensor<fp16, [1, ?, 1280]> x_481_cast_fp16 = reshape(shape = concat_595x, x = var_5826_cast_fp16)[name = string("x_481_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5830_to_fp16 = const()[name = string("op_5830_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348428608)))];
+            tensor<fp16, [1280]> var_5831_to_fp16 = const()[name = string("op_5831_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351705472)))];
+            tensor<fp16, [1, ?, 1280]> linear_213_cast_fp16 = linear(bias = var_5831_to_fp16, weight = var_5830_to_fp16, x = x_481_cast_fp16)[name = string("linear_213_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_483_cast_fp16 = add(x = x_477_cast_fp16, y = linear_213_cast_fp16)[name = string("x_483_cast_fp16")];
+            tensor<int32, [1]> var_5838_axes_0 = const()[name = string("op_5838_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_26_mlp_ln_weight_to_fp16 = const()[name = string("blocks_26_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351708096)))];
+            tensor<fp16, [1280]> blocks_26_mlp_ln_bias_to_fp16 = const()[name = string("blocks_26_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351710720)))];
+            tensor<fp16, [1, ?, 1280]> var_5838_cast_fp16 = layer_norm(axes = var_5838_axes_0, beta = blocks_26_mlp_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_mlp_ln_weight_to_fp16, x = x_483_cast_fp16)[name = string("op_5838_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_5847_to_fp16 = const()[name = string("op_5847_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351713344)))];
+            tensor<fp16, [5120]> var_5848_to_fp16 = const()[name = string("op_5848_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1364820608)))];
+            tensor<fp16, [1, ?, 5120]> linear_214_cast_fp16 = linear(bias = var_5848_to_fp16, weight = var_5847_to_fp16, x = var_5838_cast_fp16)[name = string("linear_214_cast_fp16")];
+            string x_487_mode_0 = const()[name = string("x_487_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_487_cast_fp16 = gelu(mode = x_487_mode_0, x = linear_214_cast_fp16)[name = string("x_487_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_5853_to_fp16 = const()[name = string("op_5853_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1364830912)))];
+            tensor<fp16, [1280]> var_5854_to_fp16 = const()[name = string("op_5854_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377938176)))];
+            tensor<fp16, [1, ?, 1280]> linear_215_cast_fp16 = linear(bias = var_5854_to_fp16, weight = var_5853_to_fp16, x = x_487_cast_fp16)[name = string("linear_215_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_489_cast_fp16 = add(x = x_483_cast_fp16, y = linear_215_cast_fp16)[name = string("x_489_cast_fp16")];
+            tensor<int32, [4]> k_cache_109_begin_0 = const()[name = string("k_cache_109_begin_0"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_109_end_0 = const()[name = string("k_cache_109_end_0"), val = tensor<int32, [4]>([28, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_109_end_mask_0 = const()[name = string("k_cache_109_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_109_squeeze_mask_0 = const()[name = string("k_cache_109_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_109_cast_fp16 = slice_by_index(begin = k_cache_109_begin_0, end = k_cache_109_end_0, end_mask = k_cache_109_end_mask_0, squeeze_mask = k_cache_109_squeeze_mask_0, x = coreml_update_state_116)[name = string("k_cache_109_cast_fp16")];
+            tensor<int32, [4]> v_cache_109_begin_0 = const()[name = string("v_cache_109_begin_0"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_109_end_0 = const()[name = string("v_cache_109_end_0"), val = tensor<int32, [4]>([28, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_109_end_mask_0 = const()[name = string("v_cache_109_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_109_squeeze_mask_0 = const()[name = string("v_cache_109_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_109_cast_fp16 = slice_by_index(begin = v_cache_109_begin_0, end = v_cache_109_end_0, end_mask = v_cache_109_end_mask_0, squeeze_mask = v_cache_109_squeeze_mask_0, x = coreml_update_state_117)[name = string("v_cache_109_cast_fp16")];
+            tensor<int32, [4]> k_cache_111_begin_0 = const()[name = string("k_cache_111_begin_0"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_111_end_0 = const()[name = string("k_cache_111_end_0"), val = tensor<int32, [4]>([28, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_111_end_mask_0 = const()[name = string("k_cache_111_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_111_squeeze_mask_0 = const()[name = string("k_cache_111_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_111_cast_fp16 = slice_by_index(begin = k_cache_111_begin_0, end = k_cache_111_end_0, end_mask = k_cache_111_end_mask_0, squeeze_mask = k_cache_111_squeeze_mask_0, x = read_state_2)[name = string("k_cache_111_cast_fp16")];
+            tensor<int32, [4]> v_cache_111_begin_0 = const()[name = string("v_cache_111_begin_0"), val = tensor<int32, [4]>([27, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_111_end_0 = const()[name = string("v_cache_111_end_0"), val = tensor<int32, [4]>([28, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_111_end_mask_0 = const()[name = string("v_cache_111_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_111_squeeze_mask_0 = const()[name = string("v_cache_111_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_111_cast_fp16 = slice_by_index(begin = v_cache_111_begin_0, end = v_cache_111_end_0, end_mask = v_cache_111_end_mask_0, squeeze_mask = v_cache_111_squeeze_mask_0, x = read_state_3)[name = string("v_cache_111_cast_fp16")];
+            int32 var_5877 = const()[name = string("op_5877"), val = int32(-1)];
+            tensor<int32, [1]> var_5895_axes_0 = const()[name = string("op_5895_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_27_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377940800)))];
+            tensor<fp16, [1280]> blocks_27_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377943424)))];
+            fp16 var_5883_to_fp16 = const()[name = string("op_5883_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_5895_cast_fp16 = layer_norm(axes = var_5895_axes_0, beta = blocks_27_attn_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_attn_ln_weight_to_fp16, x = x_489_cast_fp16)[name = string("op_5895_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5906_to_fp16 = const()[name = string("op_5906_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377946048)))];
+            tensor<fp16, [1280]> var_5907_to_fp16 = const()[name = string("op_5907_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1381222912)))];
+            tensor<fp16, [1, ?, 1280]> linear_216_cast_fp16 = linear(bias = var_5907_to_fp16, weight = var_5906_to_fp16, x = var_5895_cast_fp16)[name = string("linear_216_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5910_to_fp16 = const()[name = string("op_5910_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1381225536)))];
+            tensor<fp16, [1, ?, 1280]> linear_217_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5910_to_fp16, x = var_5895_cast_fp16)[name = string("linear_217_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5914_to_fp16 = const()[name = string("op_5914_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1384502400)))];
+            tensor<fp16, [1280]> var_5915_to_fp16 = const()[name = string("op_5915_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1387779264)))];
+            tensor<fp16, [1, ?, 1280]> linear_218_cast_fp16 = linear(bias = var_5915_to_fp16, weight = var_5914_to_fp16, x = var_5895_cast_fp16)[name = string("linear_218_cast_fp16")];
+            tensor<int32, [3]> var_5917_shape_cast_fp16 = shape(x = linear_216_cast_fp16)[name = string("op_5917_shape_cast_fp16")];
+            int32 gather_326_axis_0 = const()[name = string("gather_326_axis_0"), val = int32(0)];
+            int32 gather_326_batch_dims_0 = const()[name = string("gather_326_batch_dims_0"), val = int32(0)];
+            bool gather_326_validate_indices_0 = const()[name = string("gather_326_validate_indices_0"), val = bool(false)];
+            string var_5917_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5917_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_326_to_uint16 = const()[name = string("select_326_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5917_shape_cast_fp16_to_uint16 = cast(dtype = var_5917_shape_cast_fp16_to_uint16_dtype_0, x = var_5917_shape_cast_fp16)[name = string("cast_336")];
+            uint16 gather_326_cast_uint16 = gather(axis = gather_326_axis_0, batch_dims = gather_326_batch_dims_0, indices = select_326_to_uint16, validate_indices = gather_326_validate_indices_0, x = var_5917_shape_cast_fp16_to_uint16)[name = string("gather_326_cast_uint16")];
+            string gather_326_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_326_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_326_cast_uint16_to_int32 = cast(dtype = gather_326_cast_uint16_to_int32_dtype_0, x = gather_326_cast_uint16)[name = string("cast_335")];
+            int32 end_step_57 = add(x = offset, y = gather_326_cast_uint16_to_int32)[name = string("end_step_57")];
+            tensor<int32, [1]> expand_dims_432 = const()[name = string("expand_dims_432"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_434 = const()[name = string("expand_dims_434"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_435_axes_0 = const()[name = string("expand_dims_435_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_435 = expand_dims(axes = expand_dims_435_axes_0, x = end_step_57)[name = string("expand_dims_435")];
+            tensor<int32, [1]> concat_598_values0_0 = const()[name = string("concat_598_values0_0"), val = tensor<int32, [1]>([27])];
+            int32 concat_598_axis_0 = const()[name = string("concat_598_axis_0"), val = int32(0)];
+            bool concat_598_interleave_0 = const()[name = string("concat_598_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_598 = concat(axis = concat_598_axis_0, interleave = concat_598_interleave_0, values = (concat_598_values0_0, expand_dims_432, expand_dims_1, expand_dims_434))[name = string("concat_598")];
+            tensor<int32, [1]> concat_599_values0_0 = const()[name = string("concat_599_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_599_values1_0 = const()[name = string("concat_599_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_599_values3_0 = const()[name = string("concat_599_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_599_axis_0 = const()[name = string("concat_599_axis_0"), val = int32(0)];
+            bool concat_599_interleave_0 = const()[name = string("concat_599_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_599 = concat(axis = concat_599_axis_0, interleave = concat_599_interleave_0, values = (concat_599_values0_0, concat_599_values1_0, expand_dims_435, concat_599_values3_0))[name = string("concat_599")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_28_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_28_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_28_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_28_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_598, begin_mask = k_cache1_internal_tensor_assign_28_begin_mask_0, end = concat_599, end_mask = k_cache1_internal_tensor_assign_28_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_28_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_28_stride_0, update = linear_217_cast_fp16, x = coreml_update_state_116)[name = string("k_cache1_internal_tensor_assign_28_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_28_cast_fp16, input = k_cache1)[name = string("coreml_update_state_118_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_118 = read_state(input = k_cache1)[name = string("coreml_update_state_118")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_28_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_28_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_28_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_28_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_598, begin_mask = v_cache1_internal_tensor_assign_28_begin_mask_0, end = concat_599, end_mask = v_cache1_internal_tensor_assign_28_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_28_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_28_stride_0, update = linear_218_cast_fp16, x = coreml_update_state_117)[name = string("v_cache1_internal_tensor_assign_28_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_28_cast_fp16, input = v_cache1)[name = string("coreml_update_state_119_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_119 = read_state(input = v_cache1)[name = string("coreml_update_state_119")];
+            int32 concat_604_values0_0 = const()[name = string("concat_604_values0_0"), val = int32(1)];
+            int32 concat_604_values2_0 = const()[name = string("concat_604_values2_0"), val = int32(1280)];
+            int32 concat_604_axis_0 = const()[name = string("concat_604_axis_0"), val = int32(0)];
+            bool concat_604_interleave_0 = const()[name = string("concat_604_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_604 = concat(axis = concat_604_axis_0, interleave = concat_604_interleave_0, values = (concat_604_values0_0, end_step_57, concat_604_values2_0))[name = string("concat_604")];
+            tensor<int32, [3]> var_5933_begin_0 = const()[name = string("op_5933_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5933_end_mask_0 = const()[name = string("op_5933_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5933_cast_fp16 = slice_by_index(begin = var_5933_begin_0, end = concat_604, end_mask = var_5933_end_mask_0, x = k_cache_109_cast_fp16)[name = string("op_5933_cast_fp16")];
+            tensor<int32, [3]> var_5936_begin_0 = const()[name = string("op_5936_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5936_end_mask_0 = const()[name = string("op_5936_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_5936_cast_fp16 = slice_by_index(begin = var_5936_begin_0, end = concat_604, end_mask = var_5936_end_mask_0, x = v_cache_109_cast_fp16)[name = string("op_5936_cast_fp16")];
+            tensor<int32, [4]> concat_606x = const()[name = string("concat_606x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5946_cast_fp16 = reshape(shape = concat_606x, x = linear_216_cast_fp16)[name = string("op_5946_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_268_to_fp16 = const()[name = string("const_268_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_219_cast_fp16 = mul(x = var_5946_cast_fp16, y = const_268_to_fp16)[name = string("q_219_cast_fp16")];
+            tensor<int32, [4]> concat_607x = const()[name = string("concat_607x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5953_cast_fp16 = reshape(shape = concat_607x, x = var_5933_cast_fp16)[name = string("op_5953_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_275_cast_fp16 = mul(x = var_5953_cast_fp16, y = const_269_to_fp16)[name = string("k_275_cast_fp16")];
+            tensor<int32, [4]> concat_608x = const()[name = string("concat_608x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_5960_cast_fp16 = reshape(shape = concat_608x, x = var_5936_cast_fp16)[name = string("op_5960_cast_fp16")];
+            tensor<int32, [4]> var_5961 = const()[name = string("op_5961"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_163_transpose_x_0 = const()[name = string("qk_163_transpose_x_0"), val = bool(false)];
+            bool qk_163_transpose_y_0 = const()[name = string("qk_163_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_365_perm_0 = const()[name = string("transpose_365_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_366_perm_0 = const()[name = string("transpose_366_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_366 = transpose(perm = transpose_366_perm_0, x = k_275_cast_fp16)[name = string("transpose_422")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_365 = transpose(perm = transpose_365_perm_0, x = q_219_cast_fp16)[name = string("transpose_423")];
+            tensor<fp16, [1, 20, ?, ?]> qk_163_cast_fp16 = matmul(transpose_x = qk_163_transpose_x_0, transpose_y = qk_163_transpose_y_0, x = transpose_365, y = transpose_366)[name = string("qk_163_cast_fp16")];
+            int32 concat_609_values1_0 = const()[name = string("concat_609_values1_0"), val = int32(448)];
+            int32 concat_609_axis_0 = const()[name = string("concat_609_axis_0"), val = int32(0)];
+            bool concat_609_interleave_0 = const()[name = string("concat_609_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_609 = concat(axis = concat_609_axis_0, interleave = concat_609_interleave_0, values = (gather_326_cast_uint16_to_int32, concat_609_values1_0))[name = string("concat_609")];
+            tensor<int32, [2]> var_5964_begin_0 = const()[name = string("op_5964_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5964_end_mask_0 = const()[name = string("op_5964_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5964_cast_fp16 = slice_by_index(begin = var_5964_begin_0, end = concat_609, end_mask = var_5964_end_mask_0, x = mask_to_fp16)[name = string("op_5964_cast_fp16")];
+            int32 concat_610_values0_0 = const()[name = string("concat_610_values0_0"), val = int32(0)];
+            int32 concat_610_axis_0 = const()[name = string("concat_610_axis_0"), val = int32(0)];
+            bool concat_610_interleave_0 = const()[name = string("concat_610_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_610 = concat(axis = concat_610_axis_0, interleave = concat_610_interleave_0, values = (concat_610_values0_0, gather_326_cast_uint16_to_int32))[name = string("concat_610")];
+            tensor<int32, [2]> var_5965_begin_0 = const()[name = string("op_5965_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5965_end_mask_0 = const()[name = string("op_5965_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5965_cast_fp16 = slice_by_index(begin = var_5965_begin_0, end = concat_610, end_mask = var_5965_end_mask_0, x = var_5964_cast_fp16)[name = string("op_5965_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_165_cast_fp16 = add(x = qk_163_cast_fp16, y = var_5965_cast_fp16)[name = string("qk_165_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_5968_cast_fp16 = softmax(axis = var_5877, x = qk_165_cast_fp16)[name = string("op_5968_cast_fp16")];
+            bool var_5970_transpose_x_0 = const()[name = string("op_5970_transpose_x_0"), val = bool(false)];
+            bool var_5970_transpose_y_0 = const()[name = string("op_5970_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_275_cast_fp16 = transpose(perm = var_5961, x = var_5960_cast_fp16)[name = string("transpose_424")];
+            tensor<fp16, [1, 20, ?, 64]> var_5970_cast_fp16 = matmul(transpose_x = var_5970_transpose_x_0, transpose_y = var_5970_transpose_y_0, x = var_5968_cast_fp16, y = v_275_cast_fp16)[name = string("op_5970_cast_fp16")];
+            tensor<int32, [4]> var_5971 = const()[name = string("op_5971"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_611x = const()[name = string("concat_611x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_5972_cast_fp16 = transpose(perm = var_5971, x = var_5970_cast_fp16)[name = string("transpose_421")];
+            tensor<fp16, [1, ?, 1280]> x_493_cast_fp16 = reshape(shape = concat_611x, x = var_5972_cast_fp16)[name = string("x_493_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5976_to_fp16 = const()[name = string("op_5976_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1387781888)))];
+            tensor<fp16, [1280]> var_5977_to_fp16 = const()[name = string("op_5977_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391058752)))];
+            tensor<fp16, [1, ?, 1280]> linear_219_cast_fp16 = linear(bias = var_5977_to_fp16, weight = var_5976_to_fp16, x = x_493_cast_fp16)[name = string("linear_219_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_495_cast_fp16 = add(x = x_489_cast_fp16, y = linear_219_cast_fp16)[name = string("x_495_cast_fp16")];
+            tensor<int32, [1]> var_5984_axes_0 = const()[name = string("op_5984_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_27_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391061376)))];
+            tensor<fp16, [1280]> blocks_27_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391064000)))];
+            tensor<fp16, [1, ?, 1280]> var_5984_cast_fp16 = layer_norm(axes = var_5984_axes_0, beta = blocks_27_cross_attn_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_cross_attn_ln_weight_to_fp16, x = x_495_cast_fp16)[name = string("op_5984_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_5993_to_fp16 = const()[name = string("op_5993_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391066624)))];
+            tensor<fp16, [1280]> var_5994_to_fp16 = const()[name = string("op_5994_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1394343488)))];
+            tensor<fp16, [1, ?, 1280]> linear_220_cast_fp16 = linear(bias = var_5994_to_fp16, weight = var_5993_to_fp16, x = var_5984_cast_fp16)[name = string("linear_220_cast_fp16")];
+            tensor<int32, [3]> concat_612 = const()[name = string("concat_612"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_613 = const()[name = string("concat_613"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_277_internal_tensor_assign_1_stride_0 = const()[name = string("k_277_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_277_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_277_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_277_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_277_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_612, begin_mask = k_277_internal_tensor_assign_1_begin_mask_0, end = concat_613, end_mask = k_277_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_277_internal_tensor_assign_1_squeeze_mask_0, stride = k_277_internal_tensor_assign_1_stride_0, update = k_cache_111_cast_fp16, x = k_7_to_fp16)[name = string("k_277_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_614 = const()[name = string("concat_614"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_615 = const()[name = string("concat_615"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_277_internal_tensor_assign_1_stride_0 = const()[name = string("v_277_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_277_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_277_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_277_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_277_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_614, begin_mask = v_277_internal_tensor_assign_1_begin_mask_0, end = concat_615, end_mask = v_277_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_277_internal_tensor_assign_1_squeeze_mask_0, stride = v_277_internal_tensor_assign_1_stride_0, update = v_cache_111_cast_fp16, x = k_7_to_fp16)[name = string("v_277_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_616x = const()[name = string("concat_616x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6014_cast_fp16 = reshape(shape = concat_616x, x = linear_220_cast_fp16)[name = string("op_6014_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_223_cast_fp16 = mul(x = var_6014_cast_fp16, y = const_270_to_fp16)[name = string("q_223_cast_fp16")];
+            tensor<int32, [4]> var_6020 = const()[name = string("op_6020"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6021_cast_fp16 = reshape(shape = var_6020, x = k_277_internal_tensor_assign_1_cast_fp16)[name = string("op_6021_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_279_cast_fp16 = mul(x = var_6021_cast_fp16, y = const_271_to_fp16)[name = string("k_279_cast_fp16")];
+            tensor<int32, [4]> var_6027 = const()[name = string("op_6027"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6028_cast_fp16 = reshape(shape = var_6027, x = v_277_internal_tensor_assign_1_cast_fp16)[name = string("op_6028_cast_fp16")];
+            tensor<int32, [4]> var_6029 = const()[name = string("op_6029"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_167_transpose_x_0 = const()[name = string("qk_167_transpose_x_0"), val = bool(false)];
+            bool qk_167_transpose_y_0 = const()[name = string("qk_167_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_367_perm_0 = const()[name = string("transpose_367_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_368_perm_0 = const()[name = string("transpose_368_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_368 = transpose(perm = transpose_368_perm_0, x = k_279_cast_fp16)[name = string("transpose_418")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_367 = transpose(perm = transpose_367_perm_0, x = q_223_cast_fp16)[name = string("transpose_419")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_167_cast_fp16 = matmul(transpose_x = qk_167_transpose_x_0, transpose_y = qk_167_transpose_y_0, x = transpose_367, y = transpose_368)[name = string("qk_167_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_6033_cast_fp16 = softmax(axis = var_5877, x = qk_167_cast_fp16)[name = string("op_6033_cast_fp16")];
+            bool var_6035_transpose_x_0 = const()[name = string("op_6035_transpose_x_0"), val = bool(false)];
+            bool var_6035_transpose_y_0 = const()[name = string("op_6035_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_279_cast_fp16 = transpose(perm = var_6029, x = var_6028_cast_fp16)[name = string("transpose_420")];
+            tensor<fp16, [1, 20, ?, 64]> var_6035_cast_fp16 = matmul(transpose_x = var_6035_transpose_x_0, transpose_y = var_6035_transpose_y_0, x = var_6033_cast_fp16, y = v_279_cast_fp16)[name = string("op_6035_cast_fp16")];
+            tensor<int32, [4]> var_6036 = const()[name = string("op_6036"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_617x = const()[name = string("concat_617x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6037_cast_fp16 = transpose(perm = var_6036, x = var_6035_cast_fp16)[name = string("transpose_417")];
+            tensor<fp16, [1, ?, 1280]> x_499_cast_fp16 = reshape(shape = concat_617x, x = var_6037_cast_fp16)[name = string("x_499_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6041_to_fp16 = const()[name = string("op_6041_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1394346112)))];
+            tensor<fp16, [1280]> var_6042_to_fp16 = const()[name = string("op_6042_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397622976)))];
+            tensor<fp16, [1, ?, 1280]> linear_221_cast_fp16 = linear(bias = var_6042_to_fp16, weight = var_6041_to_fp16, x = x_499_cast_fp16)[name = string("linear_221_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_501_cast_fp16 = add(x = x_495_cast_fp16, y = linear_221_cast_fp16)[name = string("x_501_cast_fp16")];
+            tensor<int32, [1]> var_6049_axes_0 = const()[name = string("op_6049_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_27_mlp_ln_weight_to_fp16 = const()[name = string("blocks_27_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397625600)))];
+            tensor<fp16, [1280]> blocks_27_mlp_ln_bias_to_fp16 = const()[name = string("blocks_27_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397628224)))];
+            tensor<fp16, [1, ?, 1280]> var_6049_cast_fp16 = layer_norm(axes = var_6049_axes_0, beta = blocks_27_mlp_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_mlp_ln_weight_to_fp16, x = x_501_cast_fp16)[name = string("op_6049_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_6058_to_fp16 = const()[name = string("op_6058_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397630848)))];
+            tensor<fp16, [5120]> var_6059_to_fp16 = const()[name = string("op_6059_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410738112)))];
+            tensor<fp16, [1, ?, 5120]> linear_222_cast_fp16 = linear(bias = var_6059_to_fp16, weight = var_6058_to_fp16, x = var_6049_cast_fp16)[name = string("linear_222_cast_fp16")];
+            string x_505_mode_0 = const()[name = string("x_505_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_505_cast_fp16 = gelu(mode = x_505_mode_0, x = linear_222_cast_fp16)[name = string("x_505_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_6064_to_fp16 = const()[name = string("op_6064_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410748416)))];
+            tensor<fp16, [1280]> var_6065_to_fp16 = const()[name = string("op_6065_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423855680)))];
+            tensor<fp16, [1, ?, 1280]> linear_223_cast_fp16 = linear(bias = var_6065_to_fp16, weight = var_6064_to_fp16, x = x_505_cast_fp16)[name = string("linear_223_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_507_cast_fp16 = add(x = x_501_cast_fp16, y = linear_223_cast_fp16)[name = string("x_507_cast_fp16")];
+            tensor<int32, [4]> k_cache_113_begin_0 = const()[name = string("k_cache_113_begin_0"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_113_end_0 = const()[name = string("k_cache_113_end_0"), val = tensor<int32, [4]>([29, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_113_end_mask_0 = const()[name = string("k_cache_113_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_113_squeeze_mask_0 = const()[name = string("k_cache_113_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_113_cast_fp16 = slice_by_index(begin = k_cache_113_begin_0, end = k_cache_113_end_0, end_mask = k_cache_113_end_mask_0, squeeze_mask = k_cache_113_squeeze_mask_0, x = coreml_update_state_118)[name = string("k_cache_113_cast_fp16")];
+            tensor<int32, [4]> v_cache_113_begin_0 = const()[name = string("v_cache_113_begin_0"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_113_end_0 = const()[name = string("v_cache_113_end_0"), val = tensor<int32, [4]>([29, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_113_end_mask_0 = const()[name = string("v_cache_113_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_113_squeeze_mask_0 = const()[name = string("v_cache_113_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_113_cast_fp16 = slice_by_index(begin = v_cache_113_begin_0, end = v_cache_113_end_0, end_mask = v_cache_113_end_mask_0, squeeze_mask = v_cache_113_squeeze_mask_0, x = coreml_update_state_119)[name = string("v_cache_113_cast_fp16")];
+            tensor<int32, [4]> k_cache_115_begin_0 = const()[name = string("k_cache_115_begin_0"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_115_end_0 = const()[name = string("k_cache_115_end_0"), val = tensor<int32, [4]>([29, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_115_end_mask_0 = const()[name = string("k_cache_115_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_115_squeeze_mask_0 = const()[name = string("k_cache_115_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_115_cast_fp16 = slice_by_index(begin = k_cache_115_begin_0, end = k_cache_115_end_0, end_mask = k_cache_115_end_mask_0, squeeze_mask = k_cache_115_squeeze_mask_0, x = read_state_2)[name = string("k_cache_115_cast_fp16")];
+            tensor<int32, [4]> v_cache_115_begin_0 = const()[name = string("v_cache_115_begin_0"), val = tensor<int32, [4]>([28, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_115_end_0 = const()[name = string("v_cache_115_end_0"), val = tensor<int32, [4]>([29, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_115_end_mask_0 = const()[name = string("v_cache_115_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_115_squeeze_mask_0 = const()[name = string("v_cache_115_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_115_cast_fp16 = slice_by_index(begin = v_cache_115_begin_0, end = v_cache_115_end_0, end_mask = v_cache_115_end_mask_0, squeeze_mask = v_cache_115_squeeze_mask_0, x = read_state_3)[name = string("v_cache_115_cast_fp16")];
+            int32 var_6088 = const()[name = string("op_6088"), val = int32(-1)];
+            tensor<int32, [1]> var_6106_axes_0 = const()[name = string("op_6106_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_28_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423858304)))];
+            tensor<fp16, [1280]> blocks_28_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423860928)))];
+            fp16 var_6094_to_fp16 = const()[name = string("op_6094_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_6106_cast_fp16 = layer_norm(axes = var_6106_axes_0, beta = blocks_28_attn_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_attn_ln_weight_to_fp16, x = x_507_cast_fp16)[name = string("op_6106_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6117_to_fp16 = const()[name = string("op_6117_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423863552)))];
+            tensor<fp16, [1280]> var_6118_to_fp16 = const()[name = string("op_6118_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1427140416)))];
+            tensor<fp16, [1, ?, 1280]> linear_224_cast_fp16 = linear(bias = var_6118_to_fp16, weight = var_6117_to_fp16, x = var_6106_cast_fp16)[name = string("linear_224_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6121_to_fp16 = const()[name = string("op_6121_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1427143040)))];
+            tensor<fp16, [1, ?, 1280]> linear_225_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6121_to_fp16, x = var_6106_cast_fp16)[name = string("linear_225_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6125_to_fp16 = const()[name = string("op_6125_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1430419904)))];
+            tensor<fp16, [1280]> var_6126_to_fp16 = const()[name = string("op_6126_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433696768)))];
+            tensor<fp16, [1, ?, 1280]> linear_226_cast_fp16 = linear(bias = var_6126_to_fp16, weight = var_6125_to_fp16, x = var_6106_cast_fp16)[name = string("linear_226_cast_fp16")];
+            tensor<int32, [3]> var_6128_shape_cast_fp16 = shape(x = linear_224_cast_fp16)[name = string("op_6128_shape_cast_fp16")];
+            int32 gather_338_axis_0 = const()[name = string("gather_338_axis_0"), val = int32(0)];
+            int32 gather_338_batch_dims_0 = const()[name = string("gather_338_batch_dims_0"), val = int32(0)];
+            bool gather_338_validate_indices_0 = const()[name = string("gather_338_validate_indices_0"), val = bool(false)];
+            string var_6128_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6128_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_338_to_uint16 = const()[name = string("select_338_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_6128_shape_cast_fp16_to_uint16 = cast(dtype = var_6128_shape_cast_fp16_to_uint16_dtype_0, x = var_6128_shape_cast_fp16)[name = string("cast_334")];
+            uint16 gather_338_cast_uint16 = gather(axis = gather_338_axis_0, batch_dims = gather_338_batch_dims_0, indices = select_338_to_uint16, validate_indices = gather_338_validate_indices_0, x = var_6128_shape_cast_fp16_to_uint16)[name = string("gather_338_cast_uint16")];
+            string gather_338_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_338_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_338_cast_uint16_to_int32 = cast(dtype = gather_338_cast_uint16_to_int32_dtype_0, x = gather_338_cast_uint16)[name = string("cast_333")];
+            int32 end_step_59 = add(x = offset, y = gather_338_cast_uint16_to_int32)[name = string("end_step_59")];
+            tensor<int32, [1]> expand_dims_448 = const()[name = string("expand_dims_448"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_450 = const()[name = string("expand_dims_450"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_451_axes_0 = const()[name = string("expand_dims_451_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_451 = expand_dims(axes = expand_dims_451_axes_0, x = end_step_59)[name = string("expand_dims_451")];
+            tensor<int32, [1]> concat_620_values0_0 = const()[name = string("concat_620_values0_0"), val = tensor<int32, [1]>([28])];
+            int32 concat_620_axis_0 = const()[name = string("concat_620_axis_0"), val = int32(0)];
+            bool concat_620_interleave_0 = const()[name = string("concat_620_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_620 = concat(axis = concat_620_axis_0, interleave = concat_620_interleave_0, values = (concat_620_values0_0, expand_dims_448, expand_dims_1, expand_dims_450))[name = string("concat_620")];
+            tensor<int32, [1]> concat_621_values0_0 = const()[name = string("concat_621_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_621_values1_0 = const()[name = string("concat_621_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_621_values3_0 = const()[name = string("concat_621_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_621_axis_0 = const()[name = string("concat_621_axis_0"), val = int32(0)];
+            bool concat_621_interleave_0 = const()[name = string("concat_621_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_621 = concat(axis = concat_621_axis_0, interleave = concat_621_interleave_0, values = (concat_621_values0_0, concat_621_values1_0, expand_dims_451, concat_621_values3_0))[name = string("concat_621")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_29_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_29_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_29_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_29_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_620, begin_mask = k_cache1_internal_tensor_assign_29_begin_mask_0, end = concat_621, end_mask = k_cache1_internal_tensor_assign_29_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_29_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_29_stride_0, update = linear_225_cast_fp16, x = coreml_update_state_118)[name = string("k_cache1_internal_tensor_assign_29_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_29_cast_fp16, input = k_cache1)[name = string("coreml_update_state_120_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_120 = read_state(input = k_cache1)[name = string("coreml_update_state_120")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_29_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_29_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_29_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_29_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_620, begin_mask = v_cache1_internal_tensor_assign_29_begin_mask_0, end = concat_621, end_mask = v_cache1_internal_tensor_assign_29_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_29_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_29_stride_0, update = linear_226_cast_fp16, x = coreml_update_state_119)[name = string("v_cache1_internal_tensor_assign_29_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_29_cast_fp16, input = v_cache1)[name = string("coreml_update_state_121_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_121 = read_state(input = v_cache1)[name = string("coreml_update_state_121")];
+            int32 concat_626_values0_0 = const()[name = string("concat_626_values0_0"), val = int32(1)];
+            int32 concat_626_values2_0 = const()[name = string("concat_626_values2_0"), val = int32(1280)];
+            int32 concat_626_axis_0 = const()[name = string("concat_626_axis_0"), val = int32(0)];
+            bool concat_626_interleave_0 = const()[name = string("concat_626_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_626 = concat(axis = concat_626_axis_0, interleave = concat_626_interleave_0, values = (concat_626_values0_0, end_step_59, concat_626_values2_0))[name = string("concat_626")];
+            tensor<int32, [3]> var_6144_begin_0 = const()[name = string("op_6144_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6144_end_mask_0 = const()[name = string("op_6144_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6144_cast_fp16 = slice_by_index(begin = var_6144_begin_0, end = concat_626, end_mask = var_6144_end_mask_0, x = k_cache_113_cast_fp16)[name = string("op_6144_cast_fp16")];
+            tensor<int32, [3]> var_6147_begin_0 = const()[name = string("op_6147_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6147_end_mask_0 = const()[name = string("op_6147_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6147_cast_fp16 = slice_by_index(begin = var_6147_begin_0, end = concat_626, end_mask = var_6147_end_mask_0, x = v_cache_113_cast_fp16)[name = string("op_6147_cast_fp16")];
+            tensor<int32, [4]> concat_628x = const()[name = string("concat_628x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6157_cast_fp16 = reshape(shape = concat_628x, x = linear_224_cast_fp16)[name = string("op_6157_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_272_to_fp16 = const()[name = string("const_272_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_227_cast_fp16 = mul(x = var_6157_cast_fp16, y = const_272_to_fp16)[name = string("q_227_cast_fp16")];
+            tensor<int32, [4]> concat_629x = const()[name = string("concat_629x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6164_cast_fp16 = reshape(shape = concat_629x, x = var_6144_cast_fp16)[name = string("op_6164_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_285_cast_fp16 = mul(x = var_6164_cast_fp16, y = const_273_to_fp16)[name = string("k_285_cast_fp16")];
+            tensor<int32, [4]> concat_630x = const()[name = string("concat_630x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6171_cast_fp16 = reshape(shape = concat_630x, x = var_6147_cast_fp16)[name = string("op_6171_cast_fp16")];
+            tensor<int32, [4]> var_6172 = const()[name = string("op_6172"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_169_transpose_x_0 = const()[name = string("qk_169_transpose_x_0"), val = bool(false)];
+            bool qk_169_transpose_y_0 = const()[name = string("qk_169_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_369_perm_0 = const()[name = string("transpose_369_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_370_perm_0 = const()[name = string("transpose_370_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_370 = transpose(perm = transpose_370_perm_0, x = k_285_cast_fp16)[name = string("transpose_414")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_369 = transpose(perm = transpose_369_perm_0, x = q_227_cast_fp16)[name = string("transpose_415")];
+            tensor<fp16, [1, 20, ?, ?]> qk_169_cast_fp16 = matmul(transpose_x = qk_169_transpose_x_0, transpose_y = qk_169_transpose_y_0, x = transpose_369, y = transpose_370)[name = string("qk_169_cast_fp16")];
+            int32 concat_631_values1_0 = const()[name = string("concat_631_values1_0"), val = int32(448)];
+            int32 concat_631_axis_0 = const()[name = string("concat_631_axis_0"), val = int32(0)];
+            bool concat_631_interleave_0 = const()[name = string("concat_631_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_631 = concat(axis = concat_631_axis_0, interleave = concat_631_interleave_0, values = (gather_338_cast_uint16_to_int32, concat_631_values1_0))[name = string("concat_631")];
+            tensor<int32, [2]> var_6175_begin_0 = const()[name = string("op_6175_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6175_end_mask_0 = const()[name = string("op_6175_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_6175_cast_fp16 = slice_by_index(begin = var_6175_begin_0, end = concat_631, end_mask = var_6175_end_mask_0, x = mask_to_fp16)[name = string("op_6175_cast_fp16")];
+            int32 concat_632_values0_0 = const()[name = string("concat_632_values0_0"), val = int32(0)];
+            int32 concat_632_axis_0 = const()[name = string("concat_632_axis_0"), val = int32(0)];
+            bool concat_632_interleave_0 = const()[name = string("concat_632_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_632 = concat(axis = concat_632_axis_0, interleave = concat_632_interleave_0, values = (concat_632_values0_0, gather_338_cast_uint16_to_int32))[name = string("concat_632")];
+            tensor<int32, [2]> var_6176_begin_0 = const()[name = string("op_6176_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6176_end_mask_0 = const()[name = string("op_6176_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_6176_cast_fp16 = slice_by_index(begin = var_6176_begin_0, end = concat_632, end_mask = var_6176_end_mask_0, x = var_6175_cast_fp16)[name = string("op_6176_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_171_cast_fp16 = add(x = qk_169_cast_fp16, y = var_6176_cast_fp16)[name = string("qk_171_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_6179_cast_fp16 = softmax(axis = var_6088, x = qk_171_cast_fp16)[name = string("op_6179_cast_fp16")];
+            bool var_6181_transpose_x_0 = const()[name = string("op_6181_transpose_x_0"), val = bool(false)];
+            bool var_6181_transpose_y_0 = const()[name = string("op_6181_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_285_cast_fp16 = transpose(perm = var_6172, x = var_6171_cast_fp16)[name = string("transpose_416")];
+            tensor<fp16, [1, 20, ?, 64]> var_6181_cast_fp16 = matmul(transpose_x = var_6181_transpose_x_0, transpose_y = var_6181_transpose_y_0, x = var_6179_cast_fp16, y = v_285_cast_fp16)[name = string("op_6181_cast_fp16")];
+            tensor<int32, [4]> var_6182 = const()[name = string("op_6182"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_633x = const()[name = string("concat_633x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6183_cast_fp16 = transpose(perm = var_6182, x = var_6181_cast_fp16)[name = string("transpose_413")];
+            tensor<fp16, [1, ?, 1280]> x_511_cast_fp16 = reshape(shape = concat_633x, x = var_6183_cast_fp16)[name = string("x_511_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6187_to_fp16 = const()[name = string("op_6187_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433699392)))];
+            tensor<fp16, [1280]> var_6188_to_fp16 = const()[name = string("op_6188_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436976256)))];
+            tensor<fp16, [1, ?, 1280]> linear_227_cast_fp16 = linear(bias = var_6188_to_fp16, weight = var_6187_to_fp16, x = x_511_cast_fp16)[name = string("linear_227_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_513_cast_fp16 = add(x = x_507_cast_fp16, y = linear_227_cast_fp16)[name = string("x_513_cast_fp16")];
+            tensor<int32, [1]> var_6195_axes_0 = const()[name = string("op_6195_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_28_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436978880)))];
+            tensor<fp16, [1280]> blocks_28_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436981504)))];
+            tensor<fp16, [1, ?, 1280]> var_6195_cast_fp16 = layer_norm(axes = var_6195_axes_0, beta = blocks_28_cross_attn_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_cross_attn_ln_weight_to_fp16, x = x_513_cast_fp16)[name = string("op_6195_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6204_to_fp16 = const()[name = string("op_6204_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436984128)))];
+            tensor<fp16, [1280]> var_6205_to_fp16 = const()[name = string("op_6205_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440260992)))];
+            tensor<fp16, [1, ?, 1280]> linear_228_cast_fp16 = linear(bias = var_6205_to_fp16, weight = var_6204_to_fp16, x = var_6195_cast_fp16)[name = string("linear_228_cast_fp16")];
+            tensor<int32, [3]> concat_634 = const()[name = string("concat_634"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_635 = const()[name = string("concat_635"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_287_internal_tensor_assign_1_stride_0 = const()[name = string("k_287_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_287_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_287_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_287_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_287_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_634, begin_mask = k_287_internal_tensor_assign_1_begin_mask_0, end = concat_635, end_mask = k_287_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_287_internal_tensor_assign_1_squeeze_mask_0, stride = k_287_internal_tensor_assign_1_stride_0, update = k_cache_115_cast_fp16, x = k_7_to_fp16)[name = string("k_287_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_636 = const()[name = string("concat_636"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_637 = const()[name = string("concat_637"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_287_internal_tensor_assign_1_stride_0 = const()[name = string("v_287_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_287_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_287_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_287_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_287_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_636, begin_mask = v_287_internal_tensor_assign_1_begin_mask_0, end = concat_637, end_mask = v_287_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_287_internal_tensor_assign_1_squeeze_mask_0, stride = v_287_internal_tensor_assign_1_stride_0, update = v_cache_115_cast_fp16, x = k_7_to_fp16)[name = string("v_287_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_638x = const()[name = string("concat_638x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6225_cast_fp16 = reshape(shape = concat_638x, x = linear_228_cast_fp16)[name = string("op_6225_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_274_to_fp16 = const()[name = string("const_274_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_231_cast_fp16 = mul(x = var_6225_cast_fp16, y = const_274_to_fp16)[name = string("q_231_cast_fp16")];
+            tensor<int32, [4]> var_6231 = const()[name = string("op_6231"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6232_cast_fp16 = reshape(shape = var_6231, x = k_287_internal_tensor_assign_1_cast_fp16)[name = string("op_6232_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_275_to_fp16 = const()[name = string("const_275_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_289_cast_fp16 = mul(x = var_6232_cast_fp16, y = const_275_to_fp16)[name = string("k_289_cast_fp16")];
+            tensor<int32, [4]> var_6238 = const()[name = string("op_6238"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6239_cast_fp16 = reshape(shape = var_6238, x = v_287_internal_tensor_assign_1_cast_fp16)[name = string("op_6239_cast_fp16")];
+            tensor<int32, [4]> var_6240 = const()[name = string("op_6240"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_173_transpose_x_0 = const()[name = string("qk_173_transpose_x_0"), val = bool(false)];
+            bool qk_173_transpose_y_0 = const()[name = string("qk_173_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_371_perm_0 = const()[name = string("transpose_371_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_372_perm_0 = const()[name = string("transpose_372_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_372 = transpose(perm = transpose_372_perm_0, x = k_289_cast_fp16)[name = string("transpose_410")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_371 = transpose(perm = transpose_371_perm_0, x = q_231_cast_fp16)[name = string("transpose_411")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_173_cast_fp16 = matmul(transpose_x = qk_173_transpose_x_0, transpose_y = qk_173_transpose_y_0, x = transpose_371, y = transpose_372)[name = string("qk_173_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_6244_cast_fp16 = softmax(axis = var_6088, x = qk_173_cast_fp16)[name = string("op_6244_cast_fp16")];
+            bool var_6246_transpose_x_0 = const()[name = string("op_6246_transpose_x_0"), val = bool(false)];
+            bool var_6246_transpose_y_0 = const()[name = string("op_6246_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_289_cast_fp16 = transpose(perm = var_6240, x = var_6239_cast_fp16)[name = string("transpose_412")];
+            tensor<fp16, [1, 20, ?, 64]> var_6246_cast_fp16 = matmul(transpose_x = var_6246_transpose_x_0, transpose_y = var_6246_transpose_y_0, x = var_6244_cast_fp16, y = v_289_cast_fp16)[name = string("op_6246_cast_fp16")];
+            tensor<int32, [4]> var_6247 = const()[name = string("op_6247"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_639x = const()[name = string("concat_639x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6248_cast_fp16 = transpose(perm = var_6247, x = var_6246_cast_fp16)[name = string("transpose_409")];
+            tensor<fp16, [1, ?, 1280]> x_517_cast_fp16 = reshape(shape = concat_639x, x = var_6248_cast_fp16)[name = string("x_517_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6252_to_fp16 = const()[name = string("op_6252_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440263616)))];
+            tensor<fp16, [1280]> var_6253_to_fp16 = const()[name = string("op_6253_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443540480)))];
+            tensor<fp16, [1, ?, 1280]> linear_229_cast_fp16 = linear(bias = var_6253_to_fp16, weight = var_6252_to_fp16, x = x_517_cast_fp16)[name = string("linear_229_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_519_cast_fp16 = add(x = x_513_cast_fp16, y = linear_229_cast_fp16)[name = string("x_519_cast_fp16")];
+            tensor<int32, [1]> var_6260_axes_0 = const()[name = string("op_6260_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_28_mlp_ln_weight_to_fp16 = const()[name = string("blocks_28_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443543104)))];
+            tensor<fp16, [1280]> blocks_28_mlp_ln_bias_to_fp16 = const()[name = string("blocks_28_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443545728)))];
+            tensor<fp16, [1, ?, 1280]> var_6260_cast_fp16 = layer_norm(axes = var_6260_axes_0, beta = blocks_28_mlp_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_mlp_ln_weight_to_fp16, x = x_519_cast_fp16)[name = string("op_6260_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_6269_to_fp16 = const()[name = string("op_6269_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443548352)))];
+            tensor<fp16, [5120]> var_6270_to_fp16 = const()[name = string("op_6270_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1456655616)))];
+            tensor<fp16, [1, ?, 5120]> linear_230_cast_fp16 = linear(bias = var_6270_to_fp16, weight = var_6269_to_fp16, x = var_6260_cast_fp16)[name = string("linear_230_cast_fp16")];
+            string x_523_mode_0 = const()[name = string("x_523_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_523_cast_fp16 = gelu(mode = x_523_mode_0, x = linear_230_cast_fp16)[name = string("x_523_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_6275_to_fp16 = const()[name = string("op_6275_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1456665920)))];
+            tensor<fp16, [1280]> var_6276_to_fp16 = const()[name = string("op_6276_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469773184)))];
+            tensor<fp16, [1, ?, 1280]> linear_231_cast_fp16 = linear(bias = var_6276_to_fp16, weight = var_6275_to_fp16, x = x_523_cast_fp16)[name = string("linear_231_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_525_cast_fp16 = add(x = x_519_cast_fp16, y = linear_231_cast_fp16)[name = string("x_525_cast_fp16")];
+            tensor<int32, [4]> k_cache_117_begin_0 = const()[name = string("k_cache_117_begin_0"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_117_end_0 = const()[name = string("k_cache_117_end_0"), val = tensor<int32, [4]>([30, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_117_end_mask_0 = const()[name = string("k_cache_117_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_117_squeeze_mask_0 = const()[name = string("k_cache_117_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_117_cast_fp16 = slice_by_index(begin = k_cache_117_begin_0, end = k_cache_117_end_0, end_mask = k_cache_117_end_mask_0, squeeze_mask = k_cache_117_squeeze_mask_0, x = coreml_update_state_120)[name = string("k_cache_117_cast_fp16")];
+            tensor<int32, [4]> v_cache_117_begin_0 = const()[name = string("v_cache_117_begin_0"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_117_end_0 = const()[name = string("v_cache_117_end_0"), val = tensor<int32, [4]>([30, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_117_end_mask_0 = const()[name = string("v_cache_117_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_117_squeeze_mask_0 = const()[name = string("v_cache_117_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_117_cast_fp16 = slice_by_index(begin = v_cache_117_begin_0, end = v_cache_117_end_0, end_mask = v_cache_117_end_mask_0, squeeze_mask = v_cache_117_squeeze_mask_0, x = coreml_update_state_121)[name = string("v_cache_117_cast_fp16")];
+            tensor<int32, [4]> k_cache_119_begin_0 = const()[name = string("k_cache_119_begin_0"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_119_end_0 = const()[name = string("k_cache_119_end_0"), val = tensor<int32, [4]>([30, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_119_end_mask_0 = const()[name = string("k_cache_119_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_119_squeeze_mask_0 = const()[name = string("k_cache_119_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_119_cast_fp16 = slice_by_index(begin = k_cache_119_begin_0, end = k_cache_119_end_0, end_mask = k_cache_119_end_mask_0, squeeze_mask = k_cache_119_squeeze_mask_0, x = read_state_2)[name = string("k_cache_119_cast_fp16")];
+            tensor<int32, [4]> v_cache_119_begin_0 = const()[name = string("v_cache_119_begin_0"), val = tensor<int32, [4]>([29, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_119_end_0 = const()[name = string("v_cache_119_end_0"), val = tensor<int32, [4]>([30, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_119_end_mask_0 = const()[name = string("v_cache_119_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_119_squeeze_mask_0 = const()[name = string("v_cache_119_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_119_cast_fp16 = slice_by_index(begin = v_cache_119_begin_0, end = v_cache_119_end_0, end_mask = v_cache_119_end_mask_0, squeeze_mask = v_cache_119_squeeze_mask_0, x = read_state_3)[name = string("v_cache_119_cast_fp16")];
+            int32 var_6299 = const()[name = string("op_6299"), val = int32(-1)];
+            tensor<int32, [1]> var_6317_axes_0 = const()[name = string("op_6317_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_29_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469775808)))];
+            tensor<fp16, [1280]> blocks_29_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469778432)))];
+            fp16 var_6305_to_fp16 = const()[name = string("op_6305_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_6317_cast_fp16 = layer_norm(axes = var_6317_axes_0, beta = blocks_29_attn_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_attn_ln_weight_to_fp16, x = x_525_cast_fp16)[name = string("op_6317_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6328_to_fp16 = const()[name = string("op_6328_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469781056)))];
+            tensor<fp16, [1280]> var_6329_to_fp16 = const()[name = string("op_6329_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1473057920)))];
+            tensor<fp16, [1, ?, 1280]> linear_232_cast_fp16 = linear(bias = var_6329_to_fp16, weight = var_6328_to_fp16, x = var_6317_cast_fp16)[name = string("linear_232_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1473060544)))];
+            tensor<fp16, [1, ?, 1280]> linear_233_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6332_to_fp16, x = var_6317_cast_fp16)[name = string("linear_233_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6336_to_fp16 = const()[name = string("op_6336_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1476337408)))];
+            tensor<fp16, [1280]> var_6337_to_fp16 = const()[name = string("op_6337_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1479614272)))];
+            tensor<fp16, [1, ?, 1280]> linear_234_cast_fp16 = linear(bias = var_6337_to_fp16, weight = var_6336_to_fp16, x = var_6317_cast_fp16)[name = string("linear_234_cast_fp16")];
+            tensor<int32, [3]> var_6339_shape_cast_fp16 = shape(x = linear_232_cast_fp16)[name = string("op_6339_shape_cast_fp16")];
+            int32 gather_350_axis_0 = const()[name = string("gather_350_axis_0"), val = int32(0)];
+            int32 gather_350_batch_dims_0 = const()[name = string("gather_350_batch_dims_0"), val = int32(0)];
+            bool gather_350_validate_indices_0 = const()[name = string("gather_350_validate_indices_0"), val = bool(false)];
+            string var_6339_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6339_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_350_to_uint16 = const()[name = string("select_350_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_6339_shape_cast_fp16_to_uint16 = cast(dtype = var_6339_shape_cast_fp16_to_uint16_dtype_0, x = var_6339_shape_cast_fp16)[name = string("cast_332")];
+            uint16 gather_350_cast_uint16 = gather(axis = gather_350_axis_0, batch_dims = gather_350_batch_dims_0, indices = select_350_to_uint16, validate_indices = gather_350_validate_indices_0, x = var_6339_shape_cast_fp16_to_uint16)[name = string("gather_350_cast_uint16")];
+            string gather_350_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_350_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_350_cast_uint16_to_int32 = cast(dtype = gather_350_cast_uint16_to_int32_dtype_0, x = gather_350_cast_uint16)[name = string("cast_331")];
+            int32 end_step_61 = add(x = offset, y = gather_350_cast_uint16_to_int32)[name = string("end_step_61")];
+            tensor<int32, [1]> expand_dims_464 = const()[name = string("expand_dims_464"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_466 = const()[name = string("expand_dims_466"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_467_axes_0 = const()[name = string("expand_dims_467_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_467 = expand_dims(axes = expand_dims_467_axes_0, x = end_step_61)[name = string("expand_dims_467")];
+            tensor<int32, [1]> concat_642_values0_0 = const()[name = string("concat_642_values0_0"), val = tensor<int32, [1]>([29])];
+            int32 concat_642_axis_0 = const()[name = string("concat_642_axis_0"), val = int32(0)];
+            bool concat_642_interleave_0 = const()[name = string("concat_642_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_642 = concat(axis = concat_642_axis_0, interleave = concat_642_interleave_0, values = (concat_642_values0_0, expand_dims_464, expand_dims_1, expand_dims_466))[name = string("concat_642")];
+            tensor<int32, [1]> concat_643_values0_0 = const()[name = string("concat_643_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_643_values1_0 = const()[name = string("concat_643_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_643_values3_0 = const()[name = string("concat_643_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_643_axis_0 = const()[name = string("concat_643_axis_0"), val = int32(0)];
+            bool concat_643_interleave_0 = const()[name = string("concat_643_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_643 = concat(axis = concat_643_axis_0, interleave = concat_643_interleave_0, values = (concat_643_values0_0, concat_643_values1_0, expand_dims_467, concat_643_values3_0))[name = string("concat_643")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_30_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_30_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_30_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_30_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_642, begin_mask = k_cache1_internal_tensor_assign_30_begin_mask_0, end = concat_643, end_mask = k_cache1_internal_tensor_assign_30_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_30_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_30_stride_0, update = linear_233_cast_fp16, x = coreml_update_state_120)[name = string("k_cache1_internal_tensor_assign_30_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_30_cast_fp16, input = k_cache1)[name = string("coreml_update_state_122_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_122 = read_state(input = k_cache1)[name = string("coreml_update_state_122")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_30_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_30_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_30_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_30_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_642, begin_mask = v_cache1_internal_tensor_assign_30_begin_mask_0, end = concat_643, end_mask = v_cache1_internal_tensor_assign_30_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_30_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_30_stride_0, update = linear_234_cast_fp16, x = coreml_update_state_121)[name = string("v_cache1_internal_tensor_assign_30_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_30_cast_fp16, input = v_cache1)[name = string("coreml_update_state_123_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_123 = read_state(input = v_cache1)[name = string("coreml_update_state_123")];
+            int32 concat_648_values0_0 = const()[name = string("concat_648_values0_0"), val = int32(1)];
+            int32 concat_648_values2_0 = const()[name = string("concat_648_values2_0"), val = int32(1280)];
+            int32 concat_648_axis_0 = const()[name = string("concat_648_axis_0"), val = int32(0)];
+            bool concat_648_interleave_0 = const()[name = string("concat_648_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_648 = concat(axis = concat_648_axis_0, interleave = concat_648_interleave_0, values = (concat_648_values0_0, end_step_61, concat_648_values2_0))[name = string("concat_648")];
+            tensor<int32, [3]> var_6355_begin_0 = const()[name = string("op_6355_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6355_end_mask_0 = const()[name = string("op_6355_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6355_cast_fp16 = slice_by_index(begin = var_6355_begin_0, end = concat_648, end_mask = var_6355_end_mask_0, x = k_cache_117_cast_fp16)[name = string("op_6355_cast_fp16")];
+            tensor<int32, [3]> var_6358_begin_0 = const()[name = string("op_6358_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6358_end_mask_0 = const()[name = string("op_6358_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6358_cast_fp16 = slice_by_index(begin = var_6358_begin_0, end = concat_648, end_mask = var_6358_end_mask_0, x = v_cache_117_cast_fp16)[name = string("op_6358_cast_fp16")];
+            tensor<int32, [4]> concat_650x = const()[name = string("concat_650x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6368_cast_fp16 = reshape(shape = concat_650x, x = linear_232_cast_fp16)[name = string("op_6368_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_276_to_fp16 = const()[name = string("const_276_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_235_cast_fp16 = mul(x = var_6368_cast_fp16, y = const_276_to_fp16)[name = string("q_235_cast_fp16")];
+            tensor<int32, [4]> concat_651x = const()[name = string("concat_651x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6375_cast_fp16 = reshape(shape = concat_651x, x = var_6355_cast_fp16)[name = string("op_6375_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_277_to_fp16 = const()[name = string("const_277_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_295_cast_fp16 = mul(x = var_6375_cast_fp16, y = const_277_to_fp16)[name = string("k_295_cast_fp16")];
+            tensor<int32, [4]> concat_652x = const()[name = string("concat_652x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6382_cast_fp16 = reshape(shape = concat_652x, x = var_6358_cast_fp16)[name = string("op_6382_cast_fp16")];
+            tensor<int32, [4]> var_6383 = const()[name = string("op_6383"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_175_transpose_x_0 = const()[name = string("qk_175_transpose_x_0"), val = bool(false)];
+            bool qk_175_transpose_y_0 = const()[name = string("qk_175_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_373_perm_0 = const()[name = string("transpose_373_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_374_perm_0 = const()[name = string("transpose_374_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_374 = transpose(perm = transpose_374_perm_0, x = k_295_cast_fp16)[name = string("transpose_406")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_373 = transpose(perm = transpose_373_perm_0, x = q_235_cast_fp16)[name = string("transpose_407")];
+            tensor<fp16, [1, 20, ?, ?]> qk_175_cast_fp16 = matmul(transpose_x = qk_175_transpose_x_0, transpose_y = qk_175_transpose_y_0, x = transpose_373, y = transpose_374)[name = string("qk_175_cast_fp16")];
+            int32 concat_653_values1_0 = const()[name = string("concat_653_values1_0"), val = int32(448)];
+            int32 concat_653_axis_0 = const()[name = string("concat_653_axis_0"), val = int32(0)];
+            bool concat_653_interleave_0 = const()[name = string("concat_653_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_653 = concat(axis = concat_653_axis_0, interleave = concat_653_interleave_0, values = (gather_350_cast_uint16_to_int32, concat_653_values1_0))[name = string("concat_653")];
+            tensor<int32, [2]> var_6386_begin_0 = const()[name = string("op_6386_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6386_end_mask_0 = const()[name = string("op_6386_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_6386_cast_fp16 = slice_by_index(begin = var_6386_begin_0, end = concat_653, end_mask = var_6386_end_mask_0, x = mask_to_fp16)[name = string("op_6386_cast_fp16")];
+            int32 concat_654_values0_0 = const()[name = string("concat_654_values0_0"), val = int32(0)];
+            int32 concat_654_axis_0 = const()[name = string("concat_654_axis_0"), val = int32(0)];
+            bool concat_654_interleave_0 = const()[name = string("concat_654_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_654 = concat(axis = concat_654_axis_0, interleave = concat_654_interleave_0, values = (concat_654_values0_0, gather_350_cast_uint16_to_int32))[name = string("concat_654")];
+            tensor<int32, [2]> var_6387_begin_0 = const()[name = string("op_6387_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6387_end_mask_0 = const()[name = string("op_6387_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_6387_cast_fp16 = slice_by_index(begin = var_6387_begin_0, end = concat_654, end_mask = var_6387_end_mask_0, x = var_6386_cast_fp16)[name = string("op_6387_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_177_cast_fp16 = add(x = qk_175_cast_fp16, y = var_6387_cast_fp16)[name = string("qk_177_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_6390_cast_fp16 = softmax(axis = var_6299, x = qk_177_cast_fp16)[name = string("op_6390_cast_fp16")];
+            bool var_6392_transpose_x_0 = const()[name = string("op_6392_transpose_x_0"), val = bool(false)];
+            bool var_6392_transpose_y_0 = const()[name = string("op_6392_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_295_cast_fp16 = transpose(perm = var_6383, x = var_6382_cast_fp16)[name = string("transpose_408")];
+            tensor<fp16, [1, 20, ?, 64]> var_6392_cast_fp16 = matmul(transpose_x = var_6392_transpose_x_0, transpose_y = var_6392_transpose_y_0, x = var_6390_cast_fp16, y = v_295_cast_fp16)[name = string("op_6392_cast_fp16")];
+            tensor<int32, [4]> var_6393 = const()[name = string("op_6393"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_655x = const()[name = string("concat_655x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6394_cast_fp16 = transpose(perm = var_6393, x = var_6392_cast_fp16)[name = string("transpose_405")];
+            tensor<fp16, [1, ?, 1280]> x_529_cast_fp16 = reshape(shape = concat_655x, x = var_6394_cast_fp16)[name = string("x_529_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6398_to_fp16 = const()[name = string("op_6398_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1479616896)))];
+            tensor<fp16, [1280]> var_6399_to_fp16 = const()[name = string("op_6399_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482893760)))];
+            tensor<fp16, [1, ?, 1280]> linear_235_cast_fp16 = linear(bias = var_6399_to_fp16, weight = var_6398_to_fp16, x = x_529_cast_fp16)[name = string("linear_235_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_531_cast_fp16 = add(x = x_525_cast_fp16, y = linear_235_cast_fp16)[name = string("x_531_cast_fp16")];
+            tensor<int32, [1]> var_6406_axes_0 = const()[name = string("op_6406_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_29_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482896384)))];
+            tensor<fp16, [1280]> blocks_29_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482899008)))];
+            tensor<fp16, [1, ?, 1280]> var_6406_cast_fp16 = layer_norm(axes = var_6406_axes_0, beta = blocks_29_cross_attn_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_cross_attn_ln_weight_to_fp16, x = x_531_cast_fp16)[name = string("op_6406_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6415_to_fp16 = const()[name = string("op_6415_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482901632)))];
+            tensor<fp16, [1280]> var_6416_to_fp16 = const()[name = string("op_6416_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1486178496)))];
+            tensor<fp16, [1, ?, 1280]> linear_236_cast_fp16 = linear(bias = var_6416_to_fp16, weight = var_6415_to_fp16, x = var_6406_cast_fp16)[name = string("linear_236_cast_fp16")];
+            tensor<int32, [3]> concat_656 = const()[name = string("concat_656"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_657 = const()[name = string("concat_657"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_297_internal_tensor_assign_1_stride_0 = const()[name = string("k_297_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_297_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_297_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_297_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_297_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_656, begin_mask = k_297_internal_tensor_assign_1_begin_mask_0, end = concat_657, end_mask = k_297_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_297_internal_tensor_assign_1_squeeze_mask_0, stride = k_297_internal_tensor_assign_1_stride_0, update = k_cache_119_cast_fp16, x = k_7_to_fp16)[name = string("k_297_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_658 = const()[name = string("concat_658"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_659 = const()[name = string("concat_659"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_297_internal_tensor_assign_1_stride_0 = const()[name = string("v_297_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_297_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_297_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_297_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_297_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_658, begin_mask = v_297_internal_tensor_assign_1_begin_mask_0, end = concat_659, end_mask = v_297_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_297_internal_tensor_assign_1_squeeze_mask_0, stride = v_297_internal_tensor_assign_1_stride_0, update = v_cache_119_cast_fp16, x = k_7_to_fp16)[name = string("v_297_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_660x = const()[name = string("concat_660x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6436_cast_fp16 = reshape(shape = concat_660x, x = linear_236_cast_fp16)[name = string("op_6436_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_278_to_fp16 = const()[name = string("const_278_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_239_cast_fp16 = mul(x = var_6436_cast_fp16, y = const_278_to_fp16)[name = string("q_239_cast_fp16")];
+            tensor<int32, [4]> var_6442 = const()[name = string("op_6442"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6443_cast_fp16 = reshape(shape = var_6442, x = k_297_internal_tensor_assign_1_cast_fp16)[name = string("op_6443_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_279_to_fp16 = const()[name = string("const_279_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_299_cast_fp16 = mul(x = var_6443_cast_fp16, y = const_279_to_fp16)[name = string("k_299_cast_fp16")];
+            tensor<int32, [4]> var_6449 = const()[name = string("op_6449"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6450_cast_fp16 = reshape(shape = var_6449, x = v_297_internal_tensor_assign_1_cast_fp16)[name = string("op_6450_cast_fp16")];
+            tensor<int32, [4]> var_6451 = const()[name = string("op_6451"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_179_transpose_x_0 = const()[name = string("qk_179_transpose_x_0"), val = bool(false)];
+            bool qk_179_transpose_y_0 = const()[name = string("qk_179_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_375_perm_0 = const()[name = string("transpose_375_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_376_perm_0 = const()[name = string("transpose_376_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_376 = transpose(perm = transpose_376_perm_0, x = k_299_cast_fp16)[name = string("transpose_402")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_375 = transpose(perm = transpose_375_perm_0, x = q_239_cast_fp16)[name = string("transpose_403")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_179_cast_fp16 = matmul(transpose_x = qk_179_transpose_x_0, transpose_y = qk_179_transpose_y_0, x = transpose_375, y = transpose_376)[name = string("qk_179_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_6455_cast_fp16 = softmax(axis = var_6299, x = qk_179_cast_fp16)[name = string("op_6455_cast_fp16")];
+            bool var_6457_transpose_x_0 = const()[name = string("op_6457_transpose_x_0"), val = bool(false)];
+            bool var_6457_transpose_y_0 = const()[name = string("op_6457_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_299_cast_fp16 = transpose(perm = var_6451, x = var_6450_cast_fp16)[name = string("transpose_404")];
+            tensor<fp16, [1, 20, ?, 64]> var_6457_cast_fp16 = matmul(transpose_x = var_6457_transpose_x_0, transpose_y = var_6457_transpose_y_0, x = var_6455_cast_fp16, y = v_299_cast_fp16)[name = string("op_6457_cast_fp16")];
+            tensor<int32, [4]> var_6458 = const()[name = string("op_6458"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_661x = const()[name = string("concat_661x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6459_cast_fp16 = transpose(perm = var_6458, x = var_6457_cast_fp16)[name = string("transpose_401")];
+            tensor<fp16, [1, ?, 1280]> x_535_cast_fp16 = reshape(shape = concat_661x, x = var_6459_cast_fp16)[name = string("x_535_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6463_to_fp16 = const()[name = string("op_6463_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1486181120)))];
+            tensor<fp16, [1280]> var_6464_to_fp16 = const()[name = string("op_6464_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489457984)))];
+            tensor<fp16, [1, ?, 1280]> linear_237_cast_fp16 = linear(bias = var_6464_to_fp16, weight = var_6463_to_fp16, x = x_535_cast_fp16)[name = string("linear_237_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_537_cast_fp16 = add(x = x_531_cast_fp16, y = linear_237_cast_fp16)[name = string("x_537_cast_fp16")];
+            tensor<int32, [1]> var_6471_axes_0 = const()[name = string("op_6471_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_29_mlp_ln_weight_to_fp16 = const()[name = string("blocks_29_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489460608)))];
+            tensor<fp16, [1280]> blocks_29_mlp_ln_bias_to_fp16 = const()[name = string("blocks_29_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489463232)))];
+            tensor<fp16, [1, ?, 1280]> var_6471_cast_fp16 = layer_norm(axes = var_6471_axes_0, beta = blocks_29_mlp_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_mlp_ln_weight_to_fp16, x = x_537_cast_fp16)[name = string("op_6471_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_6480_to_fp16 = const()[name = string("op_6480_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489465856)))];
+            tensor<fp16, [5120]> var_6481_to_fp16 = const()[name = string("op_6481_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1502573120)))];
+            tensor<fp16, [1, ?, 5120]> linear_238_cast_fp16 = linear(bias = var_6481_to_fp16, weight = var_6480_to_fp16, x = var_6471_cast_fp16)[name = string("linear_238_cast_fp16")];
+            string x_541_mode_0 = const()[name = string("x_541_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_541_cast_fp16 = gelu(mode = x_541_mode_0, x = linear_238_cast_fp16)[name = string("x_541_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1502583424)))];
+            tensor<fp16, [1280]> var_6487_to_fp16 = const()[name = string("op_6487_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515690688)))];
+            tensor<fp16, [1, ?, 1280]> linear_239_cast_fp16 = linear(bias = var_6487_to_fp16, weight = var_6486_to_fp16, x = x_541_cast_fp16)[name = string("linear_239_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_543_cast_fp16 = add(x = x_537_cast_fp16, y = linear_239_cast_fp16)[name = string("x_543_cast_fp16")];
+            tensor<int32, [4]> k_cache_121_begin_0 = const()[name = string("k_cache_121_begin_0"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_121_end_0 = const()[name = string("k_cache_121_end_0"), val = tensor<int32, [4]>([31, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_121_end_mask_0 = const()[name = string("k_cache_121_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_121_squeeze_mask_0 = const()[name = string("k_cache_121_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_121_cast_fp16 = slice_by_index(begin = k_cache_121_begin_0, end = k_cache_121_end_0, end_mask = k_cache_121_end_mask_0, squeeze_mask = k_cache_121_squeeze_mask_0, x = coreml_update_state_122)[name = string("k_cache_121_cast_fp16")];
+            tensor<int32, [4]> v_cache_121_begin_0 = const()[name = string("v_cache_121_begin_0"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_121_end_0 = const()[name = string("v_cache_121_end_0"), val = tensor<int32, [4]>([31, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_121_end_mask_0 = const()[name = string("v_cache_121_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_121_squeeze_mask_0 = const()[name = string("v_cache_121_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_121_cast_fp16 = slice_by_index(begin = v_cache_121_begin_0, end = v_cache_121_end_0, end_mask = v_cache_121_end_mask_0, squeeze_mask = v_cache_121_squeeze_mask_0, x = coreml_update_state_123)[name = string("v_cache_121_cast_fp16")];
+            tensor<int32, [4]> k_cache_123_begin_0 = const()[name = string("k_cache_123_begin_0"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_123_end_0 = const()[name = string("k_cache_123_end_0"), val = tensor<int32, [4]>([31, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_123_end_mask_0 = const()[name = string("k_cache_123_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_123_squeeze_mask_0 = const()[name = string("k_cache_123_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_123_cast_fp16 = slice_by_index(begin = k_cache_123_begin_0, end = k_cache_123_end_0, end_mask = k_cache_123_end_mask_0, squeeze_mask = k_cache_123_squeeze_mask_0, x = read_state_2)[name = string("k_cache_123_cast_fp16")];
+            tensor<int32, [4]> v_cache_123_begin_0 = const()[name = string("v_cache_123_begin_0"), val = tensor<int32, [4]>([30, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_123_end_0 = const()[name = string("v_cache_123_end_0"), val = tensor<int32, [4]>([31, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_123_end_mask_0 = const()[name = string("v_cache_123_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_123_squeeze_mask_0 = const()[name = string("v_cache_123_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_123_cast_fp16 = slice_by_index(begin = v_cache_123_begin_0, end = v_cache_123_end_0, end_mask = v_cache_123_end_mask_0, squeeze_mask = v_cache_123_squeeze_mask_0, x = read_state_3)[name = string("v_cache_123_cast_fp16")];
+            int32 var_6510 = const()[name = string("op_6510"), val = int32(-1)];
+            tensor<int32, [1]> var_6528_axes_0 = const()[name = string("op_6528_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_30_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515693312)))];
+            tensor<fp16, [1280]> blocks_30_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515695936)))];
+            fp16 var_6516_to_fp16 = const()[name = string("op_6516_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_6528_cast_fp16 = layer_norm(axes = var_6528_axes_0, beta = blocks_30_attn_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_attn_ln_weight_to_fp16, x = x_543_cast_fp16)[name = string("op_6528_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6539_to_fp16 = const()[name = string("op_6539_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515698560)))];
+            tensor<fp16, [1280]> var_6540_to_fp16 = const()[name = string("op_6540_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1518975424)))];
+            tensor<fp16, [1, ?, 1280]> linear_240_cast_fp16 = linear(bias = var_6540_to_fp16, weight = var_6539_to_fp16, x = var_6528_cast_fp16)[name = string("linear_240_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6543_to_fp16 = const()[name = string("op_6543_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1518978048)))];
+            tensor<fp16, [1, ?, 1280]> linear_241_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6543_to_fp16, x = var_6528_cast_fp16)[name = string("linear_241_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6547_to_fp16 = const()[name = string("op_6547_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1522254912)))];
+            tensor<fp16, [1280]> var_6548_to_fp16 = const()[name = string("op_6548_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1525531776)))];
+            tensor<fp16, [1, ?, 1280]> linear_242_cast_fp16 = linear(bias = var_6548_to_fp16, weight = var_6547_to_fp16, x = var_6528_cast_fp16)[name = string("linear_242_cast_fp16")];
+            tensor<int32, [3]> var_6550_shape_cast_fp16 = shape(x = linear_240_cast_fp16)[name = string("op_6550_shape_cast_fp16")];
+            int32 gather_362_axis_0 = const()[name = string("gather_362_axis_0"), val = int32(0)];
+            int32 gather_362_batch_dims_0 = const()[name = string("gather_362_batch_dims_0"), val = int32(0)];
+            bool gather_362_validate_indices_0 = const()[name = string("gather_362_validate_indices_0"), val = bool(false)];
+            string var_6550_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6550_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_362_to_uint16 = const()[name = string("select_362_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_6550_shape_cast_fp16_to_uint16 = cast(dtype = var_6550_shape_cast_fp16_to_uint16_dtype_0, x = var_6550_shape_cast_fp16)[name = string("cast_330")];
+            uint16 gather_362_cast_uint16 = gather(axis = gather_362_axis_0, batch_dims = gather_362_batch_dims_0, indices = select_362_to_uint16, validate_indices = gather_362_validate_indices_0, x = var_6550_shape_cast_fp16_to_uint16)[name = string("gather_362_cast_uint16")];
+            string gather_362_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_362_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_362_cast_uint16_to_int32 = cast(dtype = gather_362_cast_uint16_to_int32_dtype_0, x = gather_362_cast_uint16)[name = string("cast_329")];
+            int32 end_step_63 = add(x = offset, y = gather_362_cast_uint16_to_int32)[name = string("end_step_63")];
+            tensor<int32, [1]> expand_dims_480 = const()[name = string("expand_dims_480"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_482 = const()[name = string("expand_dims_482"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_483_axes_0 = const()[name = string("expand_dims_483_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_483 = expand_dims(axes = expand_dims_483_axes_0, x = end_step_63)[name = string("expand_dims_483")];
+            tensor<int32, [1]> concat_664_values0_0 = const()[name = string("concat_664_values0_0"), val = tensor<int32, [1]>([30])];
+            int32 concat_664_axis_0 = const()[name = string("concat_664_axis_0"), val = int32(0)];
+            bool concat_664_interleave_0 = const()[name = string("concat_664_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_664 = concat(axis = concat_664_axis_0, interleave = concat_664_interleave_0, values = (concat_664_values0_0, expand_dims_480, expand_dims_1, expand_dims_482))[name = string("concat_664")];
+            tensor<int32, [1]> concat_665_values0_0 = const()[name = string("concat_665_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_665_values1_0 = const()[name = string("concat_665_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_665_values3_0 = const()[name = string("concat_665_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_665_axis_0 = const()[name = string("concat_665_axis_0"), val = int32(0)];
+            bool concat_665_interleave_0 = const()[name = string("concat_665_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_665 = concat(axis = concat_665_axis_0, interleave = concat_665_interleave_0, values = (concat_665_values0_0, concat_665_values1_0, expand_dims_483, concat_665_values3_0))[name = string("concat_665")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_31_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_31_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_31_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_31_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_664, begin_mask = k_cache1_internal_tensor_assign_31_begin_mask_0, end = concat_665, end_mask = k_cache1_internal_tensor_assign_31_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_31_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_31_stride_0, update = linear_241_cast_fp16, x = coreml_update_state_122)[name = string("k_cache1_internal_tensor_assign_31_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_31_cast_fp16, input = k_cache1)[name = string("coreml_update_state_124_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_124 = read_state(input = k_cache1)[name = string("coreml_update_state_124")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_31_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_31_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_31_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_31_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_664, begin_mask = v_cache1_internal_tensor_assign_31_begin_mask_0, end = concat_665, end_mask = v_cache1_internal_tensor_assign_31_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_31_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_31_stride_0, update = linear_242_cast_fp16, x = coreml_update_state_123)[name = string("v_cache1_internal_tensor_assign_31_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_31_cast_fp16, input = v_cache1)[name = string("coreml_update_state_125_write_state")];
+            tensor<fp16, [32, 1, 448, 1280]> coreml_update_state_125 = read_state(input = v_cache1)[name = string("coreml_update_state_125")];
+            int32 concat_670_values0_0 = const()[name = string("concat_670_values0_0"), val = int32(1)];
+            int32 concat_670_values2_0 = const()[name = string("concat_670_values2_0"), val = int32(1280)];
+            int32 concat_670_axis_0 = const()[name = string("concat_670_axis_0"), val = int32(0)];
+            bool concat_670_interleave_0 = const()[name = string("concat_670_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_670 = concat(axis = concat_670_axis_0, interleave = concat_670_interleave_0, values = (concat_670_values0_0, end_step_63, concat_670_values2_0))[name = string("concat_670")];
+            tensor<int32, [3]> var_6566_begin_0 = const()[name = string("op_6566_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6566_end_mask_0 = const()[name = string("op_6566_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6566_cast_fp16 = slice_by_index(begin = var_6566_begin_0, end = concat_670, end_mask = var_6566_end_mask_0, x = k_cache_121_cast_fp16)[name = string("op_6566_cast_fp16")];
+            tensor<int32, [3]> var_6569_begin_0 = const()[name = string("op_6569_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6569_end_mask_0 = const()[name = string("op_6569_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6569_cast_fp16 = slice_by_index(begin = var_6569_begin_0, end = concat_670, end_mask = var_6569_end_mask_0, x = v_cache_121_cast_fp16)[name = string("op_6569_cast_fp16")];
+            tensor<int32, [4]> concat_672x = const()[name = string("concat_672x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6579_cast_fp16 = reshape(shape = concat_672x, x = linear_240_cast_fp16)[name = string("op_6579_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_243_cast_fp16 = mul(x = var_6579_cast_fp16, y = const_280_to_fp16)[name = string("q_243_cast_fp16")];
+            tensor<int32, [4]> concat_673x = const()[name = string("concat_673x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6586_cast_fp16 = reshape(shape = concat_673x, x = var_6566_cast_fp16)[name = string("op_6586_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_305_cast_fp16 = mul(x = var_6586_cast_fp16, y = const_281_to_fp16)[name = string("k_305_cast_fp16")];
+            tensor<int32, [4]> concat_674x = const()[name = string("concat_674x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6593_cast_fp16 = reshape(shape = concat_674x, x = var_6569_cast_fp16)[name = string("op_6593_cast_fp16")];
+            tensor<int32, [4]> var_6594 = const()[name = string("op_6594"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_181_transpose_x_0 = const()[name = string("qk_181_transpose_x_0"), val = bool(false)];
+            bool qk_181_transpose_y_0 = const()[name = string("qk_181_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_377_perm_0 = const()[name = string("transpose_377_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_378_perm_0 = const()[name = string("transpose_378_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_378 = transpose(perm = transpose_378_perm_0, x = k_305_cast_fp16)[name = string("transpose_398")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_377 = transpose(perm = transpose_377_perm_0, x = q_243_cast_fp16)[name = string("transpose_399")];
+            tensor<fp16, [1, 20, ?, ?]> qk_181_cast_fp16 = matmul(transpose_x = qk_181_transpose_x_0, transpose_y = qk_181_transpose_y_0, x = transpose_377, y = transpose_378)[name = string("qk_181_cast_fp16")];
+            int32 concat_675_values1_0 = const()[name = string("concat_675_values1_0"), val = int32(448)];
+            int32 concat_675_axis_0 = const()[name = string("concat_675_axis_0"), val = int32(0)];
+            bool concat_675_interleave_0 = const()[name = string("concat_675_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_675 = concat(axis = concat_675_axis_0, interleave = concat_675_interleave_0, values = (gather_362_cast_uint16_to_int32, concat_675_values1_0))[name = string("concat_675")];
+            tensor<int32, [2]> var_6597_begin_0 = const()[name = string("op_6597_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6597_end_mask_0 = const()[name = string("op_6597_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_6597_cast_fp16 = slice_by_index(begin = var_6597_begin_0, end = concat_675, end_mask = var_6597_end_mask_0, x = mask_to_fp16)[name = string("op_6597_cast_fp16")];
+            int32 concat_676_values0_0 = const()[name = string("concat_676_values0_0"), val = int32(0)];
+            int32 concat_676_axis_0 = const()[name = string("concat_676_axis_0"), val = int32(0)];
+            bool concat_676_interleave_0 = const()[name = string("concat_676_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_676 = concat(axis = concat_676_axis_0, interleave = concat_676_interleave_0, values = (concat_676_values0_0, gather_362_cast_uint16_to_int32))[name = string("concat_676")];
+            tensor<int32, [2]> var_6598_begin_0 = const()[name = string("op_6598_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6598_end_mask_0 = const()[name = string("op_6598_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_6598_cast_fp16 = slice_by_index(begin = var_6598_begin_0, end = concat_676, end_mask = var_6598_end_mask_0, x = var_6597_cast_fp16)[name = string("op_6598_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_183_cast_fp16 = add(x = qk_181_cast_fp16, y = var_6598_cast_fp16)[name = string("qk_183_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_6601_cast_fp16 = softmax(axis = var_6510, x = qk_183_cast_fp16)[name = string("op_6601_cast_fp16")];
+            bool var_6603_transpose_x_0 = const()[name = string("op_6603_transpose_x_0"), val = bool(false)];
+            bool var_6603_transpose_y_0 = const()[name = string("op_6603_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_305_cast_fp16 = transpose(perm = var_6594, x = var_6593_cast_fp16)[name = string("transpose_400")];
+            tensor<fp16, [1, 20, ?, 64]> var_6603_cast_fp16 = matmul(transpose_x = var_6603_transpose_x_0, transpose_y = var_6603_transpose_y_0, x = var_6601_cast_fp16, y = v_305_cast_fp16)[name = string("op_6603_cast_fp16")];
+            tensor<int32, [4]> var_6604 = const()[name = string("op_6604"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_677x = const()[name = string("concat_677x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6605_cast_fp16 = transpose(perm = var_6604, x = var_6603_cast_fp16)[name = string("transpose_397")];
+            tensor<fp16, [1, ?, 1280]> x_547_cast_fp16 = reshape(shape = concat_677x, x = var_6605_cast_fp16)[name = string("x_547_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6609_to_fp16 = const()[name = string("op_6609_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1525534400)))];
+            tensor<fp16, [1280]> var_6610_to_fp16 = const()[name = string("op_6610_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528811264)))];
+            tensor<fp16, [1, ?, 1280]> linear_243_cast_fp16 = linear(bias = var_6610_to_fp16, weight = var_6609_to_fp16, x = x_547_cast_fp16)[name = string("linear_243_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_549_cast_fp16 = add(x = x_543_cast_fp16, y = linear_243_cast_fp16)[name = string("x_549_cast_fp16")];
+            tensor<int32, [1]> var_6617_axes_0 = const()[name = string("op_6617_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_30_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528813888)))];
+            tensor<fp16, [1280]> blocks_30_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528816512)))];
+            tensor<fp16, [1, ?, 1280]> var_6617_cast_fp16 = layer_norm(axes = var_6617_axes_0, beta = blocks_30_cross_attn_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_cross_attn_ln_weight_to_fp16, x = x_549_cast_fp16)[name = string("op_6617_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6626_to_fp16 = const()[name = string("op_6626_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528819136)))];
+            tensor<fp16, [1280]> var_6627_to_fp16 = const()[name = string("op_6627_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1532096000)))];
+            tensor<fp16, [1, ?, 1280]> linear_244_cast_fp16 = linear(bias = var_6627_to_fp16, weight = var_6626_to_fp16, x = var_6617_cast_fp16)[name = string("linear_244_cast_fp16")];
+            tensor<int32, [3]> concat_678 = const()[name = string("concat_678"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_679 = const()[name = string("concat_679"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_307_internal_tensor_assign_1_stride_0 = const()[name = string("k_307_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_307_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_307_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_307_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_307_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_678, begin_mask = k_307_internal_tensor_assign_1_begin_mask_0, end = concat_679, end_mask = k_307_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_307_internal_tensor_assign_1_squeeze_mask_0, stride = k_307_internal_tensor_assign_1_stride_0, update = k_cache_123_cast_fp16, x = k_7_to_fp16)[name = string("k_307_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_680 = const()[name = string("concat_680"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_681 = const()[name = string("concat_681"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_307_internal_tensor_assign_1_stride_0 = const()[name = string("v_307_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_307_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_307_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_307_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_307_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_680, begin_mask = v_307_internal_tensor_assign_1_begin_mask_0, end = concat_681, end_mask = v_307_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_307_internal_tensor_assign_1_squeeze_mask_0, stride = v_307_internal_tensor_assign_1_stride_0, update = v_cache_123_cast_fp16, x = k_7_to_fp16)[name = string("v_307_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_682x = const()[name = string("concat_682x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6647_cast_fp16 = reshape(shape = concat_682x, x = linear_244_cast_fp16)[name = string("op_6647_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_282_to_fp16 = const()[name = string("const_282_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_247_cast_fp16 = mul(x = var_6647_cast_fp16, y = const_282_to_fp16)[name = string("q_247_cast_fp16")];
+            tensor<int32, [4]> var_6653 = const()[name = string("op_6653"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6654_cast_fp16 = reshape(shape = var_6653, x = k_307_internal_tensor_assign_1_cast_fp16)[name = string("op_6654_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_309_cast_fp16 = mul(x = var_6654_cast_fp16, y = const_283_to_fp16)[name = string("k_309_cast_fp16")];
+            tensor<int32, [4]> var_6660 = const()[name = string("op_6660"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6661_cast_fp16 = reshape(shape = var_6660, x = v_307_internal_tensor_assign_1_cast_fp16)[name = string("op_6661_cast_fp16")];
+            tensor<int32, [4]> var_6662 = const()[name = string("op_6662"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_185_transpose_x_0 = const()[name = string("qk_185_transpose_x_0"), val = bool(false)];
+            bool qk_185_transpose_y_0 = const()[name = string("qk_185_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_379_perm_0 = const()[name = string("transpose_379_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_380_perm_0 = const()[name = string("transpose_380_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_380 = transpose(perm = transpose_380_perm_0, x = k_309_cast_fp16)[name = string("transpose_394")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_379 = transpose(perm = transpose_379_perm_0, x = q_247_cast_fp16)[name = string("transpose_395")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_185_cast_fp16 = matmul(transpose_x = qk_185_transpose_x_0, transpose_y = qk_185_transpose_y_0, x = transpose_379, y = transpose_380)[name = string("qk_185_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_6666_cast_fp16 = softmax(axis = var_6510, x = qk_185_cast_fp16)[name = string("op_6666_cast_fp16")];
+            bool var_6668_transpose_x_0 = const()[name = string("op_6668_transpose_x_0"), val = bool(false)];
+            bool var_6668_transpose_y_0 = const()[name = string("op_6668_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_309_cast_fp16 = transpose(perm = var_6662, x = var_6661_cast_fp16)[name = string("transpose_396")];
+            tensor<fp16, [1, 20, ?, 64]> var_6668_cast_fp16 = matmul(transpose_x = var_6668_transpose_x_0, transpose_y = var_6668_transpose_y_0, x = var_6666_cast_fp16, y = v_309_cast_fp16)[name = string("op_6668_cast_fp16")];
+            tensor<int32, [4]> var_6669 = const()[name = string("op_6669"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_683x = const()[name = string("concat_683x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6670_cast_fp16 = transpose(perm = var_6669, x = var_6668_cast_fp16)[name = string("transpose_393")];
+            tensor<fp16, [1, ?, 1280]> x_553_cast_fp16 = reshape(shape = concat_683x, x = var_6670_cast_fp16)[name = string("x_553_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6674_to_fp16 = const()[name = string("op_6674_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1532098624)))];
+            tensor<fp16, [1280]> var_6675_to_fp16 = const()[name = string("op_6675_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535375488)))];
+            tensor<fp16, [1, ?, 1280]> linear_245_cast_fp16 = linear(bias = var_6675_to_fp16, weight = var_6674_to_fp16, x = x_553_cast_fp16)[name = string("linear_245_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_555_cast_fp16 = add(x = x_549_cast_fp16, y = linear_245_cast_fp16)[name = string("x_555_cast_fp16")];
+            tensor<int32, [1]> var_6682_axes_0 = const()[name = string("op_6682_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_30_mlp_ln_weight_to_fp16 = const()[name = string("blocks_30_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535378112)))];
+            tensor<fp16, [1280]> blocks_30_mlp_ln_bias_to_fp16 = const()[name = string("blocks_30_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535380736)))];
+            tensor<fp16, [1, ?, 1280]> var_6682_cast_fp16 = layer_norm(axes = var_6682_axes_0, beta = blocks_30_mlp_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_mlp_ln_weight_to_fp16, x = x_555_cast_fp16)[name = string("op_6682_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_6691_to_fp16 = const()[name = string("op_6691_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535383360)))];
+            tensor<fp16, [5120]> var_6692_to_fp16 = const()[name = string("op_6692_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1548490624)))];
+            tensor<fp16, [1, ?, 5120]> linear_246_cast_fp16 = linear(bias = var_6692_to_fp16, weight = var_6691_to_fp16, x = var_6682_cast_fp16)[name = string("linear_246_cast_fp16")];
+            string x_559_mode_0 = const()[name = string("x_559_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_559_cast_fp16 = gelu(mode = x_559_mode_0, x = linear_246_cast_fp16)[name = string("x_559_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_6697_to_fp16 = const()[name = string("op_6697_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1548500928)))];
+            tensor<fp16, [1280]> var_6698_to_fp16 = const()[name = string("op_6698_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561608192)))];
+            tensor<fp16, [1, ?, 1280]> linear_247_cast_fp16 = linear(bias = var_6698_to_fp16, weight = var_6697_to_fp16, x = x_559_cast_fp16)[name = string("linear_247_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_561_cast_fp16 = add(x = x_555_cast_fp16, y = linear_247_cast_fp16)[name = string("x_561_cast_fp16")];
+            tensor<int32, [4]> k_cache_125_begin_0 = const()[name = string("k_cache_125_begin_0"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_125_end_0 = const()[name = string("k_cache_125_end_0"), val = tensor<int32, [4]>([32, 1, 448, 1280])];
+            tensor<bool, [4]> k_cache_125_end_mask_0 = const()[name = string("k_cache_125_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_125_squeeze_mask_0 = const()[name = string("k_cache_125_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> k_cache_125_cast_fp16 = slice_by_index(begin = k_cache_125_begin_0, end = k_cache_125_end_0, end_mask = k_cache_125_end_mask_0, squeeze_mask = k_cache_125_squeeze_mask_0, x = coreml_update_state_124)[name = string("k_cache_125_cast_fp16")];
+            tensor<int32, [4]> v_cache_125_begin_0 = const()[name = string("v_cache_125_begin_0"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_125_end_0 = const()[name = string("v_cache_125_end_0"), val = tensor<int32, [4]>([32, 1, 448, 1280])];
+            tensor<bool, [4]> v_cache_125_end_mask_0 = const()[name = string("v_cache_125_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_125_squeeze_mask_0 = const()[name = string("v_cache_125_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1280]> v_cache_125_cast_fp16 = slice_by_index(begin = v_cache_125_begin_0, end = v_cache_125_end_0, end_mask = v_cache_125_end_mask_0, squeeze_mask = v_cache_125_squeeze_mask_0, x = coreml_update_state_125)[name = string("v_cache_125_cast_fp16")];
+            tensor<int32, [4]> k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor<int32, [4]>([32, 1, 1500, 1280])];
+            tensor<bool, [4]> k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")];
+            tensor<int32, [4]> v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor<int32, [4]>([31, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor<int32, [4]>([32, 1, 1500, 1280])];
+            tensor<bool, [4]> v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")];
+            int32 var_6721 = const()[name = string("op_6721"), val = int32(-1)];
+            tensor<int32, [1]> var_6739_axes_0 = const()[name = string("op_6739_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_31_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561610816)))];
+            tensor<fp16, [1280]> blocks_31_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561613440)))];
+            fp16 var_6727_to_fp16 = const()[name = string("op_6727_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_6739_cast_fp16 = layer_norm(axes = var_6739_axes_0, beta = blocks_31_attn_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_attn_ln_weight_to_fp16, x = x_561_cast_fp16)[name = string("op_6739_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6750_to_fp16 = const()[name = string("op_6750_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561616064)))];
+            tensor<fp16, [1280]> var_6751_to_fp16 = const()[name = string("op_6751_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564892928)))];
+            tensor<fp16, [1, ?, 1280]> linear_248_cast_fp16 = linear(bias = var_6751_to_fp16, weight = var_6750_to_fp16, x = var_6739_cast_fp16)[name = string("linear_248_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6754_to_fp16 = const()[name = string("op_6754_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564895552)))];
+            tensor<fp16, [1, ?, 1280]> linear_249_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6754_to_fp16, x = var_6739_cast_fp16)[name = string("linear_249_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6758_to_fp16 = const()[name = string("op_6758_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1568172416)))];
+            tensor<fp16, [1280]> var_6759_to_fp16 = const()[name = string("op_6759_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1571449280)))];
+            tensor<fp16, [1, ?, 1280]> linear_250_cast_fp16 = linear(bias = var_6759_to_fp16, weight = var_6758_to_fp16, x = var_6739_cast_fp16)[name = string("linear_250_cast_fp16")];
+            tensor<int32, [3]> var_6761_shape_cast_fp16 = shape(x = linear_248_cast_fp16)[name = string("op_6761_shape_cast_fp16")];
+            int32 gather_374_axis_0 = const()[name = string("gather_374_axis_0"), val = int32(0)];
+            int32 gather_374_batch_dims_0 = const()[name = string("gather_374_batch_dims_0"), val = int32(0)];
+            bool gather_374_validate_indices_0 = const()[name = string("gather_374_validate_indices_0"), val = bool(false)];
+            string var_6761_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6761_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_374_to_uint16 = const()[name = string("select_374_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_6761_shape_cast_fp16_to_uint16 = cast(dtype = var_6761_shape_cast_fp16_to_uint16_dtype_0, x = var_6761_shape_cast_fp16)[name = string("cast_328")];
+            uint16 gather_374_cast_uint16 = gather(axis = gather_374_axis_0, batch_dims = gather_374_batch_dims_0, indices = select_374_to_uint16, validate_indices = gather_374_validate_indices_0, x = var_6761_shape_cast_fp16_to_uint16)[name = string("gather_374_cast_uint16")];
+            string gather_374_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_374_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_374_cast_uint16_to_int32 = cast(dtype = gather_374_cast_uint16_to_int32_dtype_0, x = gather_374_cast_uint16)[name = string("cast_327")];
+            int32 end_step = add(x = offset, y = gather_374_cast_uint16_to_int32)[name = string("end_step")];
+            tensor<int32, [1]> expand_dims_496 = const()[name = string("expand_dims_496"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_498 = const()[name = string("expand_dims_498"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_499_axes_0 = const()[name = string("expand_dims_499_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_499 = expand_dims(axes = expand_dims_499_axes_0, x = end_step)[name = string("expand_dims_499")];
+            tensor<int32, [1]> concat_686_values0_0 = const()[name = string("concat_686_values0_0"), val = tensor<int32, [1]>([31])];
+            int32 concat_686_axis_0 = const()[name = string("concat_686_axis_0"), val = int32(0)];
+            bool concat_686_interleave_0 = const()[name = string("concat_686_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_686 = concat(axis = concat_686_axis_0, interleave = concat_686_interleave_0, values = (concat_686_values0_0, expand_dims_496, expand_dims_1, expand_dims_498))[name = string("concat_686")];
+            tensor<int32, [1]> concat_687_values0_0 = const()[name = string("concat_687_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_687_values1_0 = const()[name = string("concat_687_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_687_values3_0 = const()[name = string("concat_687_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_687_axis_0 = const()[name = string("concat_687_axis_0"), val = int32(0)];
+            bool concat_687_interleave_0 = const()[name = string("concat_687_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_687 = concat(axis = concat_687_axis_0, interleave = concat_687_interleave_0, values = (concat_687_values0_0, concat_687_values1_0, expand_dims_499, concat_687_values3_0))[name = string("concat_687")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_32_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_32_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_32_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_32_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> k_cache1_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_686, begin_mask = k_cache1_internal_tensor_assign_32_begin_mask_0, end = concat_687, end_mask = k_cache1_internal_tensor_assign_32_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_32_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_32_stride_0, update = linear_249_cast_fp16, x = coreml_update_state_124)[name = string("k_cache1_internal_tensor_assign_32_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_32_cast_fp16, input = k_cache1)[name = string("coreml_update_state_126_write_state")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_32_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_32_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_32_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_32_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [32, 1, 448, 1280]> v_cache1_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_686, begin_mask = v_cache1_internal_tensor_assign_32_begin_mask_0, end = concat_687, end_mask = v_cache1_internal_tensor_assign_32_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_32_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_32_stride_0, update = linear_250_cast_fp16, x = coreml_update_state_125)[name = string("v_cache1_internal_tensor_assign_32_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_32_cast_fp16, input = v_cache1)[name = string("coreml_update_state_127_write_state")];
+            int32 concat_692_values0_0 = const()[name = string("concat_692_values0_0"), val = int32(1)];
+            int32 concat_692_values2_0 = const()[name = string("concat_692_values2_0"), val = int32(1280)];
+            int32 concat_692_axis_0 = const()[name = string("concat_692_axis_0"), val = int32(0)];
+            bool concat_692_interleave_0 = const()[name = string("concat_692_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_692 = concat(axis = concat_692_axis_0, interleave = concat_692_interleave_0, values = (concat_692_values0_0, end_step, concat_692_values2_0))[name = string("concat_692")];
+            tensor<int32, [3]> var_6777_begin_0 = const()[name = string("op_6777_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6777_end_mask_0 = const()[name = string("op_6777_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6777_cast_fp16 = slice_by_index(begin = var_6777_begin_0, end = concat_692, end_mask = var_6777_end_mask_0, x = k_cache_125_cast_fp16)[name = string("op_6777_cast_fp16")];
+            tensor<int32, [3]> var_6780_begin_0 = const()[name = string("op_6780_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_6780_end_mask_0 = const()[name = string("op_6780_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1280]> var_6780_cast_fp16 = slice_by_index(begin = var_6780_begin_0, end = concat_692, end_mask = var_6780_end_mask_0, x = v_cache_125_cast_fp16)[name = string("op_6780_cast_fp16")];
+            tensor<int32, [4]> concat_694x = const()[name = string("concat_694x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6790_cast_fp16 = reshape(shape = concat_694x, x = linear_248_cast_fp16)[name = string("op_6790_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_284_to_fp16 = const()[name = string("const_284_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_251_cast_fp16 = mul(x = var_6790_cast_fp16, y = const_284_to_fp16)[name = string("q_251_cast_fp16")];
+            tensor<int32, [4]> concat_695x = const()[name = string("concat_695x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6797_cast_fp16 = reshape(shape = concat_695x, x = var_6777_cast_fp16)[name = string("op_6797_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> k_315_cast_fp16 = mul(x = var_6797_cast_fp16, y = const_285_to_fp16)[name = string("k_315_cast_fp16")];
+            tensor<int32, [4]> concat_696x = const()[name = string("concat_696x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6804_cast_fp16 = reshape(shape = concat_696x, x = var_6780_cast_fp16)[name = string("op_6804_cast_fp16")];
+            tensor<int32, [4]> var_6805 = const()[name = string("op_6805"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_187_transpose_x_0 = const()[name = string("qk_187_transpose_x_0"), val = bool(false)];
+            bool qk_187_transpose_y_0 = const()[name = string("qk_187_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_381_perm_0 = const()[name = string("transpose_381_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_382_perm_0 = const()[name = string("transpose_382_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, ?]> transpose_382 = transpose(perm = transpose_382_perm_0, x = k_315_cast_fp16)[name = string("transpose_390")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_381 = transpose(perm = transpose_381_perm_0, x = q_251_cast_fp16)[name = string("transpose_391")];
+            tensor<fp16, [1, 20, ?, ?]> qk_187_cast_fp16 = matmul(transpose_x = qk_187_transpose_x_0, transpose_y = qk_187_transpose_y_0, x = transpose_381, y = transpose_382)[name = string("qk_187_cast_fp16")];
+            int32 concat_697_values1_0 = const()[name = string("concat_697_values1_0"), val = int32(448)];
+            int32 concat_697_axis_0 = const()[name = string("concat_697_axis_0"), val = int32(0)];
+            bool concat_697_interleave_0 = const()[name = string("concat_697_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_697 = concat(axis = concat_697_axis_0, interleave = concat_697_interleave_0, values = (gather_374_cast_uint16_to_int32, concat_697_values1_0))[name = string("concat_697")];
+            tensor<int32, [2]> var_6808_begin_0 = const()[name = string("op_6808_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6808_end_mask_0 = const()[name = string("op_6808_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_6808_cast_fp16 = slice_by_index(begin = var_6808_begin_0, end = concat_697, end_mask = var_6808_end_mask_0, x = mask_to_fp16)[name = string("op_6808_cast_fp16")];
+            int32 concat_698_values0_0 = const()[name = string("concat_698_values0_0"), val = int32(0)];
+            int32 concat_698_axis_0 = const()[name = string("concat_698_axis_0"), val = int32(0)];
+            bool concat_698_interleave_0 = const()[name = string("concat_698_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_698 = concat(axis = concat_698_axis_0, interleave = concat_698_interleave_0, values = (concat_698_values0_0, gather_374_cast_uint16_to_int32))[name = string("concat_698")];
+            tensor<int32, [2]> var_6809_begin_0 = const()[name = string("op_6809_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_6809_end_mask_0 = const()[name = string("op_6809_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_6809_cast_fp16 = slice_by_index(begin = var_6809_begin_0, end = concat_698, end_mask = var_6809_end_mask_0, x = var_6808_cast_fp16)[name = string("op_6809_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> qk_189_cast_fp16 = add(x = qk_187_cast_fp16, y = var_6809_cast_fp16)[name = string("qk_189_cast_fp16")];
+            tensor<fp16, [1, 20, ?, ?]> var_6812_cast_fp16 = softmax(axis = var_6721, x = qk_189_cast_fp16)[name = string("op_6812_cast_fp16")];
+            bool var_6814_transpose_x_0 = const()[name = string("op_6814_transpose_x_0"), val = bool(false)];
+            bool var_6814_transpose_y_0 = const()[name = string("op_6814_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, ?, 64]> v_315_cast_fp16 = transpose(perm = var_6805, x = var_6804_cast_fp16)[name = string("transpose_392")];
+            tensor<fp16, [1, 20, ?, 64]> var_6814_cast_fp16 = matmul(transpose_x = var_6814_transpose_x_0, transpose_y = var_6814_transpose_y_0, x = var_6812_cast_fp16, y = v_315_cast_fp16)[name = string("op_6814_cast_fp16")];
+            tensor<int32, [4]> var_6815 = const()[name = string("op_6815"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_699x = const()[name = string("concat_699x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6816_cast_fp16 = transpose(perm = var_6815, x = var_6814_cast_fp16)[name = string("transpose_389")];
+            tensor<fp16, [1, ?, 1280]> x_565_cast_fp16 = reshape(shape = concat_699x, x = var_6816_cast_fp16)[name = string("x_565_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6820_to_fp16 = const()[name = string("op_6820_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1571451904)))];
+            tensor<fp16, [1280]> var_6821_to_fp16 = const()[name = string("op_6821_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574728768)))];
+            tensor<fp16, [1, ?, 1280]> linear_251_cast_fp16 = linear(bias = var_6821_to_fp16, weight = var_6820_to_fp16, x = x_565_cast_fp16)[name = string("linear_251_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_567_cast_fp16 = add(x = x_561_cast_fp16, y = linear_251_cast_fp16)[name = string("x_567_cast_fp16")];
+            tensor<int32, [1]> var_6828_axes_0 = const()[name = string("op_6828_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_31_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574731392)))];
+            tensor<fp16, [1280]> blocks_31_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574734016)))];
+            tensor<fp16, [1, ?, 1280]> var_6828_cast_fp16 = layer_norm(axes = var_6828_axes_0, beta = blocks_31_cross_attn_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_cross_attn_ln_weight_to_fp16, x = x_567_cast_fp16)[name = string("op_6828_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6837_to_fp16 = const()[name = string("op_6837_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574736640)))];
+            tensor<fp16, [1280]> var_6838_to_fp16 = const()[name = string("op_6838_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578013504)))];
+            tensor<fp16, [1, ?, 1280]> linear_252_cast_fp16 = linear(bias = var_6838_to_fp16, weight = var_6837_to_fp16, x = var_6828_cast_fp16)[name = string("linear_252_cast_fp16")];
+            tensor<int32, [3]> concat_700 = const()[name = string("concat_700"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_701 = const()[name = string("concat_701"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_317_internal_tensor_assign_1_stride_0 = const()[name = string("k_317_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_317_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_317_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_317_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> k_317_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_700, begin_mask = k_317_internal_tensor_assign_1_begin_mask_0, end = concat_701, end_mask = k_317_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_317_internal_tensor_assign_1_squeeze_mask_0, stride = k_317_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_317_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_702 = const()[name = string("concat_702"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_703 = const()[name = string("concat_703"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_317_internal_tensor_assign_1_stride_0 = const()[name = string("v_317_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_317_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_317_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_317_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1280]> v_317_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_702, begin_mask = v_317_internal_tensor_assign_1_begin_mask_0, end = concat_703, end_mask = v_317_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_317_internal_tensor_assign_1_squeeze_mask_0, stride = v_317_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_317_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_704x = const()[name = string("concat_704x"), val = tensor<int32, [4]>([1, -1, 20, 64])];
+            tensor<fp16, [1, ?, 20, 64]> var_6858_cast_fp16 = reshape(shape = concat_704x, x = linear_252_cast_fp16)[name = string("op_6858_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_286_to_fp16 = const()[name = string("const_286_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 20, 64]> q_cast_fp16 = mul(x = var_6858_cast_fp16, y = const_286_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_6864 = const()[name = string("op_6864"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6865_cast_fp16 = reshape(shape = var_6864, x = k_317_internal_tensor_assign_1_cast_fp16)[name = string("op_6865_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_cast_fp16 = mul(x = var_6865_cast_fp16, y = const_287_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_6871 = const()[name = string("op_6871"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_6872_cast_fp16 = reshape(shape = var_6871, x = v_317_internal_tensor_assign_1_cast_fp16)[name = string("op_6872_cast_fp16")];
+            tensor<int32, [4]> var_6873 = const()[name = string("op_6873"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_383_perm_0 = const()[name = string("transpose_383_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_384_perm_0 = const()[name = string("transpose_384_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_384 = transpose(perm = transpose_384_perm_0, x = k_cast_fp16)[name = string("transpose_386")];
+            tensor<fp16, [1, 20, ?, 64]> transpose_383 = transpose(perm = transpose_383_perm_0, x = q_cast_fp16)[name = string("transpose_387")];
+            tensor<fp16, [1, 20, ?, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_383, y = transpose_384)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 20, ?, 1500]> var_6877_cast_fp16 = softmax(axis = var_6721, x = qk_cast_fp16)[name = string("op_6877_cast_fp16")];
+            bool var_6879_transpose_x_0 = const()[name = string("op_6879_transpose_x_0"), val = bool(false)];
+            bool var_6879_transpose_y_0 = const()[name = string("op_6879_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_cast_fp16 = transpose(perm = var_6873, x = var_6872_cast_fp16)[name = string("transpose_388")];
+            tensor<fp16, [1, 20, ?, 64]> var_6879_cast_fp16 = matmul(transpose_x = var_6879_transpose_x_0, transpose_y = var_6879_transpose_y_0, x = var_6877_cast_fp16, y = v_cast_fp16)[name = string("op_6879_cast_fp16")];
+            tensor<int32, [4]> var_6880 = const()[name = string("op_6880"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_705x = const()[name = string("concat_705x"), val = tensor<int32, [3]>([1, -1, 1280])];
+            tensor<fp16, [1, ?, 20, 64]> var_6881_cast_fp16 = transpose(perm = var_6880, x = var_6879_cast_fp16)[name = string("transpose_385")];
+            tensor<fp16, [1, ?, 1280]> x_571_cast_fp16 = reshape(shape = concat_705x, x = var_6881_cast_fp16)[name = string("x_571_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_6885_to_fp16 = const()[name = string("op_6885_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578016128)))];
+            tensor<fp16, [1280]> var_6886_to_fp16 = const()[name = string("op_6886_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581292992)))];
+            tensor<fp16, [1, ?, 1280]> linear_253_cast_fp16 = linear(bias = var_6886_to_fp16, weight = var_6885_to_fp16, x = x_571_cast_fp16)[name = string("linear_253_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_573_cast_fp16 = add(x = x_567_cast_fp16, y = linear_253_cast_fp16)[name = string("x_573_cast_fp16")];
+            tensor<int32, [1]> var_6893_axes_0 = const()[name = string("op_6893_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_31_mlp_ln_weight_to_fp16 = const()[name = string("blocks_31_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581295616)))];
+            tensor<fp16, [1280]> blocks_31_mlp_ln_bias_to_fp16 = const()[name = string("blocks_31_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581298240)))];
+            tensor<fp16, [1, ?, 1280]> var_6893_cast_fp16 = layer_norm(axes = var_6893_axes_0, beta = blocks_31_mlp_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_mlp_ln_weight_to_fp16, x = x_573_cast_fp16)[name = string("op_6893_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_6902_to_fp16 = const()[name = string("op_6902_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581300864)))];
+            tensor<fp16, [5120]> var_6903_to_fp16 = const()[name = string("op_6903_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594408128)))];
+            tensor<fp16, [1, ?, 5120]> linear_254_cast_fp16 = linear(bias = var_6903_to_fp16, weight = var_6902_to_fp16, x = var_6893_cast_fp16)[name = string("linear_254_cast_fp16")];
+            string x_577_mode_0 = const()[name = string("x_577_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 5120]> x_577_cast_fp16 = gelu(mode = x_577_mode_0, x = linear_254_cast_fp16)[name = string("x_577_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_6908_to_fp16 = const()[name = string("op_6908_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594418432)))];
+            tensor<fp16, [1280]> var_6909_to_fp16 = const()[name = string("op_6909_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607525696)))];
+            tensor<fp16, [1, ?, 1280]> linear_255_cast_fp16 = linear(bias = var_6909_to_fp16, weight = var_6908_to_fp16, x = x_577_cast_fp16)[name = string("linear_255_cast_fp16")];
+            tensor<fp16, [1, ?, 1280]> x_579_cast_fp16 = add(x = x_573_cast_fp16, y = linear_255_cast_fp16)[name = string("x_579_cast_fp16")];
+            tensor<int32, [1]> var_6922_axes_0 = const()[name = string("op_6922_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607528320)))];
+            tensor<fp16, [1280]> ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607530944)))];
+            fp16 var_6913_to_fp16 = const()[name = string("op_6913_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1280]> var_6922_cast_fp16 = layer_norm(axes = var_6922_axes_0, beta = ln_bias_to_fp16, epsilon = var_6913_to_fp16, gamma = ln_weight_to_fp16, x = x_579_cast_fp16)[name = string("op_6922_cast_fp16")];
+            tensor<fp16, [51866]> var_6932_bias_0_to_fp16 = const()[name = string("op_6932_bias_0_to_fp16"), val = tensor<fp16, [51866]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607533568)))];
+            tensor<fp16, [1, ?, 51866]> logits = linear(bias = var_6932_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_6922_cast_fp16)[name = string("op_6932_cast_fp16")];
+        } -> (logits);
+}
\ No newline at end of file
diff --git a/large-v3/decoder_second.mlmodelc/weights/weight.bin b/large-v3/decoder_second.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7c5a4ed8a3f05219c651d7ab76589c894c2dd42d
--- /dev/null
+++ b/large-v3/decoder_second.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94a757620673f9ff3498457f742697cea3fbe6ad2754099f72ef6f0151ca0314
+size 1607637364
diff --git a/large-v3/encoder.mlmodelc/analytics/coremldata.bin b/large-v3/encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..41ca7472382cf724a64670ac17a1afc902f53aa7
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6182464ab69572de2980864c2fd4edc10b4b269f5fb25f0cbf5e22a86d36abc6
+size 202
diff --git a/large-v3/encoder.mlmodelc/coremldata.bin b/large-v3/encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..865e1780dfe74afcc6790112d691462f020d0651
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46dc321dd0ff6005125dc0365c3e0ecb2413f838328888df48578af4d2869749
+size 197
diff --git a/large-v3/encoder.mlmodelc/metadata.json b/large-v3/encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..09266292ff38dc28253acec8329c16abfc1c410a
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/metadata.json
@@ -0,0 +1,76 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 1280]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.add" : 65,
+      "Ios18.reshape" : 128,
+      "Ios18.linear" : 192,
+      "Ios18.gelu" : 34,
+      "Ios18.matmul" : 64,
+      "Ios18.transpose" : 129,
+      "Ios18.layerNorm" : 65,
+      "Ios18.conv" : 2,
+      "Ios18.cast" : 4,
+      "Ios18.softmax" : 32,
+      "Ios18.mul" : 64
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_pipeline",
+      "structure" : [
+        {
+          "name" : "MLModelType_mlProgram"
+        },
+        {
+          "name" : "MLModelType_mlProgram"
+        }
+      ]
+    },
+    "userDefinedMetadata" : {
+
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 128 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 128, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "chunked_pipeline",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/large-v3/encoder.mlmodelc/model0/analytics/coremldata.bin b/large-v3/encoder.mlmodelc/model0/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5ed18ae44ab3d09ffbed846536c84109f12b19b1
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/model0/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a8281049b2a65a3be541cfd9f949e84b8fe1c5251ce90e46da1626fed54e58a
+size 108
diff --git a/large-v3/encoder.mlmodelc/model0/coremldata.bin b/large-v3/encoder.mlmodelc/model0/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3b0d1904141cd0a6f720a7b6752d01959f5b3081
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/model0/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2b0461e225831cc34e0017a300f867929784559e2ee471f01ddfd3452381076
+size 201
diff --git a/large-v3/encoder.mlmodelc/model0/model.mil b/large-v3/encoder.mlmodelc/model0/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..af87fe33cb70a8c4f508e264a7682accb9cec40a
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/model0/model.mil
@@ -0,0 +1,962 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
+{
+    func main<ios18>(tensor<fp16, [1, 128, 3000]> logmel_data) {
+            string var_84_pad_type_0 = const()[name = string("op_84_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_84_pad_0 = const()[name = string("op_84_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_84_strides_0 = const()[name = string("op_84_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_84_dilations_0 = const()[name = string("op_84_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_84_groups_0 = const()[name = string("op_84_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 128, 3]> weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor<fp16, [1280, 128, 3]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1280]> bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(983168)))];
+            tensor<fp16, [1, 1280, 3000]> var_84_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_84_dilations_0, groups = var_84_groups_0, pad = var_84_pad_0, pad_type = var_84_pad_type_0, strides = var_84_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_84_cast_fp16")];
+            string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1280, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_84_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_102_pad_type_0 = const()[name = string("op_102_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_102_pad_0 = const()[name = string("op_102_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_102_strides_0 = const()[name = string("op_102_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_102_dilations_0 = const()[name = string("op_102_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_102_groups_0 = const()[name = string("op_102_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 3]> weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor<fp16, [1280, 1280, 3]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(985792)))];
+            tensor<fp16, [1280]> bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(10816256)))];
+            tensor<fp16, [1, 1280, 1500]> var_102_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_102_dilations_0, groups = var_102_groups_0, pad = var_102_pad_0, pad_type = var_102_pad_type_0, strides = var_102_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_102_cast_fp16")];
+            string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1280, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_102_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<int32, [3]> var_108 = const()[name = string("op_108"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [1500, 1280]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [1500, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(10818880)))];
+            tensor<fp16, [1, 1500, 1280]> x_5_cast_fp16 = transpose(perm = var_108, x = x_3_cast_fp16)[name = string("transpose_160")];
+            tensor<fp16, [1, 1500, 1280]> var_111_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_111_cast_fp16")];
+            int32 var_124 = const()[name = string("op_124"), val = int32(-1)];
+            tensor<int32, [1]> var_140_axes_0 = const()[name = string("op_140_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14658944)))];
+            tensor<fp16, [1280]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14661568)))];
+            fp16 var_130_to_fp16 = const()[name = string("op_130_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_140_cast_fp16 = layer_norm(axes = var_140_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_130_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_111_cast_fp16)[name = string("op_140_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14664192)))];
+            tensor<fp16, [1280]> var_152_to_fp16 = const()[name = string("op_152_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(17941056)))];
+            tensor<fp16, [1, 1500, 1280]> linear_0_cast_fp16 = linear(bias = var_152_to_fp16, weight = var_151_to_fp16, x = var_140_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_155_to_fp16 = const()[name = string("op_155_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(17943680)))];
+            tensor<fp16, [1280]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(21220544)))];
+            tensor<fp16, [1, 1500, 1280]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_155_to_fp16, x = var_140_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_159_to_fp16 = const()[name = string("op_159_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(21223168)))];
+            tensor<fp16, [1280]> var_160_to_fp16 = const()[name = string("op_160_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(24500032)))];
+            tensor<fp16, [1, 1500, 1280]> linear_2_cast_fp16 = linear(bias = var_160_to_fp16, weight = var_159_to_fp16, x = var_140_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [4]> var_168 = const()[name = string("op_168"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_169_cast_fp16 = reshape(shape = var_168, x = linear_0_cast_fp16)[name = string("op_169_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_224_to_fp16 = const()[name = string("const_224_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_3_cast_fp16 = mul(x = var_169_cast_fp16, y = const_224_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> var_175 = const()[name = string("op_175"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_176_cast_fp16 = reshape(shape = var_175, x = linear_1_cast_fp16)[name = string("op_176_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_3_cast_fp16 = mul(x = var_176_cast_fp16, y = const_225_to_fp16)[name = string("k_3_cast_fp16")];
+            tensor<int32, [4]> var_182 = const()[name = string("op_182"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_183_cast_fp16 = reshape(shape = var_182, x = linear_2_cast_fp16)[name = string("op_183_cast_fp16")];
+            tensor<int32, [4]> var_184 = const()[name = string("op_184"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_65 = transpose(perm = transpose_65_perm_0, x = k_3_cast_fp16)[name = string("transpose_158")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_64 = transpose(perm = transpose_64_perm_0, x = q_3_cast_fp16)[name = string("transpose_159")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("qk_1_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_188_cast_fp16 = softmax(axis = var_124, x = qk_1_cast_fp16)[name = string("op_188_cast_fp16")];
+            bool var_190_transpose_x_0 = const()[name = string("op_190_transpose_x_0"), val = bool(false)];
+            bool var_190_transpose_y_0 = const()[name = string("op_190_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_3_cast_fp16 = transpose(perm = var_184, x = var_183_cast_fp16)[name = string("transpose_157")];
+            tensor<fp16, [1, 20, 1500, 64]> var_190_cast_fp16 = matmul(transpose_x = var_190_transpose_x_0, transpose_y = var_190_transpose_y_0, x = var_188_cast_fp16, y = v_3_cast_fp16)[name = string("op_190_cast_fp16")];
+            tensor<int32, [4]> var_191 = const()[name = string("op_191"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_192_cast_fp16 = transpose(perm = var_191, x = var_190_cast_fp16)[name = string("transpose_156")];
+            tensor<fp16, [1, 1500, 1280]> x_11_cast_fp16 = reshape(shape = concat_0, x = var_192_cast_fp16)[name = string("x_11_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_196_to_fp16 = const()[name = string("op_196_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(24502656)))];
+            tensor<fp16, [1280]> var_197_to_fp16 = const()[name = string("op_197_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27779520)))];
+            tensor<fp16, [1, 1500, 1280]> linear_3_cast_fp16 = linear(bias = var_197_to_fp16, weight = var_196_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_13_cast_fp16 = add(x = var_111_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<int32, [1]> var_204_axes_0 = const()[name = string("op_204_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27782144)))];
+            tensor<fp16, [1280]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27784768)))];
+            tensor<fp16, [1, 1500, 1280]> var_204_cast_fp16 = layer_norm(axes = var_204_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_130_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_204_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27787392)))];
+            tensor<fp16, [5120]> var_214_to_fp16 = const()[name = string("op_214_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(40894656)))];
+            tensor<fp16, [1, 1500, 5120]> linear_4_cast_fp16 = linear(bias = var_214_to_fp16, weight = var_213_to_fp16, x = var_204_cast_fp16)[name = string("linear_4_cast_fp16")];
+            string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_219_to_fp16 = const()[name = string("op_219_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(40904960)))];
+            tensor<fp16, [1280]> var_220_to_fp16 = const()[name = string("op_220_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(54012224)))];
+            tensor<fp16, [1, 1500, 1280]> linear_5_cast_fp16 = linear(bias = var_220_to_fp16, weight = var_219_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")];
+            int32 var_230 = const()[name = string("op_230"), val = int32(-1)];
+            tensor<int32, [1]> var_246_axes_0 = const()[name = string("op_246_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(54014848)))];
+            tensor<fp16, [1280]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(54017472)))];
+            fp16 var_236_to_fp16 = const()[name = string("op_236_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_246_cast_fp16 = layer_norm(axes = var_246_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_236_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_246_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(54020096)))];
+            tensor<fp16, [1280]> var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(57296960)))];
+            tensor<fp16, [1, 1500, 1280]> linear_6_cast_fp16 = linear(bias = var_258_to_fp16, weight = var_257_to_fp16, x = var_246_cast_fp16)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_261_to_fp16 = const()[name = string("op_261_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(57299584)))];
+            tensor<fp16, [1, 1500, 1280]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_261_to_fp16, x = var_246_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(60576448)))];
+            tensor<fp16, [1280]> var_266_to_fp16 = const()[name = string("op_266_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(63853312)))];
+            tensor<fp16, [1, 1500, 1280]> linear_8_cast_fp16 = linear(bias = var_266_to_fp16, weight = var_265_to_fp16, x = var_246_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<int32, [4]> var_274 = const()[name = string("op_274"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_275_cast_fp16 = reshape(shape = var_274, x = linear_6_cast_fp16)[name = string("op_275_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_226_to_fp16 = const()[name = string("const_226_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_7_cast_fp16 = mul(x = var_275_cast_fp16, y = const_226_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_281 = const()[name = string("op_281"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_282_cast_fp16 = reshape(shape = var_281, x = linear_7_cast_fp16)[name = string("op_282_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_227_to_fp16 = const()[name = string("const_227_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_7_cast_fp16 = mul(x = var_282_cast_fp16, y = const_227_to_fp16)[name = string("k_7_cast_fp16")];
+            tensor<int32, [4]> var_288 = const()[name = string("op_288"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_289_cast_fp16 = reshape(shape = var_288, x = linear_8_cast_fp16)[name = string("op_289_cast_fp16")];
+            tensor<int32, [4]> var_290 = const()[name = string("op_290"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)];
+            bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_67 = transpose(perm = transpose_67_perm_0, x = k_7_cast_fp16)[name = string("transpose_154")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_66 = transpose(perm = transpose_66_perm_0, x = q_7_cast_fp16)[name = string("transpose_155")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_294_cast_fp16 = softmax(axis = var_230, x = qk_3_cast_fp16)[name = string("op_294_cast_fp16")];
+            bool var_296_transpose_x_0 = const()[name = string("op_296_transpose_x_0"), val = bool(false)];
+            bool var_296_transpose_y_0 = const()[name = string("op_296_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_7_cast_fp16 = transpose(perm = var_290, x = var_289_cast_fp16)[name = string("transpose_153")];
+            tensor<fp16, [1, 20, 1500, 64]> var_296_cast_fp16 = matmul(transpose_x = var_296_transpose_x_0, transpose_y = var_296_transpose_y_0, x = var_294_cast_fp16, y = v_7_cast_fp16)[name = string("op_296_cast_fp16")];
+            tensor<int32, [4]> var_297 = const()[name = string("op_297"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_298_cast_fp16 = transpose(perm = var_297, x = var_296_cast_fp16)[name = string("transpose_152")];
+            tensor<fp16, [1, 1500, 1280]> x_23_cast_fp16 = reshape(shape = concat_1, x = var_298_cast_fp16)[name = string("x_23_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_302_to_fp16 = const()[name = string("op_302_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(63855936)))];
+            tensor<fp16, [1280]> var_303_to_fp16 = const()[name = string("op_303_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(67132800)))];
+            tensor<fp16, [1, 1500, 1280]> linear_9_cast_fp16 = linear(bias = var_303_to_fp16, weight = var_302_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<int32, [1]> var_310_axes_0 = const()[name = string("op_310_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(67135424)))];
+            tensor<fp16, [1280]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(67138048)))];
+            tensor<fp16, [1, 1500, 1280]> var_310_cast_fp16 = layer_norm(axes = var_310_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_236_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_310_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_319_to_fp16 = const()[name = string("op_319_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(67140672)))];
+            tensor<fp16, [5120]> var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(80247936)))];
+            tensor<fp16, [1, 1500, 5120]> linear_10_cast_fp16 = linear(bias = var_320_to_fp16, weight = var_319_to_fp16, x = var_310_cast_fp16)[name = string("linear_10_cast_fp16")];
+            string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_325_to_fp16 = const()[name = string("op_325_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(80258240)))];
+            tensor<fp16, [1280]> var_326_to_fp16 = const()[name = string("op_326_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93365504)))];
+            tensor<fp16, [1, 1500, 1280]> linear_11_cast_fp16 = linear(bias = var_326_to_fp16, weight = var_325_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")];
+            int32 var_336 = const()[name = string("op_336"), val = int32(-1)];
+            tensor<int32, [1]> var_352_axes_0 = const()[name = string("op_352_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93368128)))];
+            tensor<fp16, [1280]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93370752)))];
+            fp16 var_342_to_fp16 = const()[name = string("op_342_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_352_cast_fp16 = layer_norm(axes = var_352_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_342_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_352_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_363_to_fp16 = const()[name = string("op_363_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93373376)))];
+            tensor<fp16, [1280]> var_364_to_fp16 = const()[name = string("op_364_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(96650240)))];
+            tensor<fp16, [1, 1500, 1280]> linear_12_cast_fp16 = linear(bias = var_364_to_fp16, weight = var_363_to_fp16, x = var_352_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(96652864)))];
+            tensor<fp16, [1, 1500, 1280]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_367_to_fp16, x = var_352_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_371_to_fp16 = const()[name = string("op_371_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(99929728)))];
+            tensor<fp16, [1280]> var_372_to_fp16 = const()[name = string("op_372_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(103206592)))];
+            tensor<fp16, [1, 1500, 1280]> linear_14_cast_fp16 = linear(bias = var_372_to_fp16, weight = var_371_to_fp16, x = var_352_cast_fp16)[name = string("linear_14_cast_fp16")];
+            tensor<int32, [4]> var_380 = const()[name = string("op_380"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_381_cast_fp16 = reshape(shape = var_380, x = linear_12_cast_fp16)[name = string("op_381_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_228_to_fp16 = const()[name = string("const_228_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_11_cast_fp16 = mul(x = var_381_cast_fp16, y = const_228_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> var_387 = const()[name = string("op_387"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_388_cast_fp16 = reshape(shape = var_387, x = linear_13_cast_fp16)[name = string("op_388_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_229_to_fp16 = const()[name = string("const_229_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_11_cast_fp16 = mul(x = var_388_cast_fp16, y = const_229_to_fp16)[name = string("k_11_cast_fp16")];
+            tensor<int32, [4]> var_394 = const()[name = string("op_394"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_395_cast_fp16 = reshape(shape = var_394, x = linear_14_cast_fp16)[name = string("op_395_cast_fp16")];
+            tensor<int32, [4]> var_396 = const()[name = string("op_396"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_69 = transpose(perm = transpose_69_perm_0, x = k_11_cast_fp16)[name = string("transpose_150")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_68 = transpose(perm = transpose_68_perm_0, x = q_11_cast_fp16)[name = string("transpose_151")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_400_cast_fp16 = softmax(axis = var_336, x = qk_5_cast_fp16)[name = string("op_400_cast_fp16")];
+            bool var_402_transpose_x_0 = const()[name = string("op_402_transpose_x_0"), val = bool(false)];
+            bool var_402_transpose_y_0 = const()[name = string("op_402_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_11_cast_fp16 = transpose(perm = var_396, x = var_395_cast_fp16)[name = string("transpose_149")];
+            tensor<fp16, [1, 20, 1500, 64]> var_402_cast_fp16 = matmul(transpose_x = var_402_transpose_x_0, transpose_y = var_402_transpose_y_0, x = var_400_cast_fp16, y = v_11_cast_fp16)[name = string("op_402_cast_fp16")];
+            tensor<int32, [4]> var_403 = const()[name = string("op_403"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_404_cast_fp16 = transpose(perm = var_403, x = var_402_cast_fp16)[name = string("transpose_148")];
+            tensor<fp16, [1, 1500, 1280]> x_35_cast_fp16 = reshape(shape = concat_2, x = var_404_cast_fp16)[name = string("x_35_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_408_to_fp16 = const()[name = string("op_408_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(103209216)))];
+            tensor<fp16, [1280]> var_409_to_fp16 = const()[name = string("op_409_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106486080)))];
+            tensor<fp16, [1, 1500, 1280]> linear_15_cast_fp16 = linear(bias = var_409_to_fp16, weight = var_408_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<int32, [1]> var_416_axes_0 = const()[name = string("op_416_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106488704)))];
+            tensor<fp16, [1280]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106491328)))];
+            tensor<fp16, [1, 1500, 1280]> var_416_cast_fp16 = layer_norm(axes = var_416_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_342_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_416_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_425_to_fp16 = const()[name = string("op_425_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106493952)))];
+            tensor<fp16, [5120]> var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(119601216)))];
+            tensor<fp16, [1, 1500, 5120]> linear_16_cast_fp16 = linear(bias = var_426_to_fp16, weight = var_425_to_fp16, x = var_416_cast_fp16)[name = string("linear_16_cast_fp16")];
+            string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_431_to_fp16 = const()[name = string("op_431_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(119611520)))];
+            tensor<fp16, [1280]> var_432_to_fp16 = const()[name = string("op_432_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132718784)))];
+            tensor<fp16, [1, 1500, 1280]> linear_17_cast_fp16 = linear(bias = var_432_to_fp16, weight = var_431_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")];
+            int32 var_442 = const()[name = string("op_442"), val = int32(-1)];
+            tensor<int32, [1]> var_458_axes_0 = const()[name = string("op_458_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132721408)))];
+            tensor<fp16, [1280]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132724032)))];
+            fp16 var_448_to_fp16 = const()[name = string("op_448_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_458_cast_fp16 = layer_norm(axes = var_458_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_448_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_458_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132726656)))];
+            tensor<fp16, [1280]> var_470_to_fp16 = const()[name = string("op_470_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(136003520)))];
+            tensor<fp16, [1, 1500, 1280]> linear_18_cast_fp16 = linear(bias = var_470_to_fp16, weight = var_469_to_fp16, x = var_458_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_473_to_fp16 = const()[name = string("op_473_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(136006144)))];
+            tensor<fp16, [1, 1500, 1280]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_473_to_fp16, x = var_458_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_477_to_fp16 = const()[name = string("op_477_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(139283008)))];
+            tensor<fp16, [1280]> var_478_to_fp16 = const()[name = string("op_478_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(142559872)))];
+            tensor<fp16, [1, 1500, 1280]> linear_20_cast_fp16 = linear(bias = var_478_to_fp16, weight = var_477_to_fp16, x = var_458_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [4]> var_486 = const()[name = string("op_486"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_487_cast_fp16 = reshape(shape = var_486, x = linear_18_cast_fp16)[name = string("op_487_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_15_cast_fp16 = mul(x = var_487_cast_fp16, y = const_230_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_493 = const()[name = string("op_493"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_494_cast_fp16 = reshape(shape = var_493, x = linear_19_cast_fp16)[name = string("op_494_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_15_cast_fp16 = mul(x = var_494_cast_fp16, y = const_231_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> var_500 = const()[name = string("op_500"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_501_cast_fp16 = reshape(shape = var_500, x = linear_20_cast_fp16)[name = string("op_501_cast_fp16")];
+            tensor<int32, [4]> var_502 = const()[name = string("op_502"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_71 = transpose(perm = transpose_71_perm_0, x = k_15_cast_fp16)[name = string("transpose_146")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_70 = transpose(perm = transpose_70_perm_0, x = q_15_cast_fp16)[name = string("transpose_147")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("qk_7_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_506_cast_fp16 = softmax(axis = var_442, x = qk_7_cast_fp16)[name = string("op_506_cast_fp16")];
+            bool var_508_transpose_x_0 = const()[name = string("op_508_transpose_x_0"), val = bool(false)];
+            bool var_508_transpose_y_0 = const()[name = string("op_508_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_15_cast_fp16 = transpose(perm = var_502, x = var_501_cast_fp16)[name = string("transpose_145")];
+            tensor<fp16, [1, 20, 1500, 64]> var_508_cast_fp16 = matmul(transpose_x = var_508_transpose_x_0, transpose_y = var_508_transpose_y_0, x = var_506_cast_fp16, y = v_15_cast_fp16)[name = string("op_508_cast_fp16")];
+            tensor<int32, [4]> var_509 = const()[name = string("op_509"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_510_cast_fp16 = transpose(perm = var_509, x = var_508_cast_fp16)[name = string("transpose_144")];
+            tensor<fp16, [1, 1500, 1280]> x_47_cast_fp16 = reshape(shape = concat_3, x = var_510_cast_fp16)[name = string("x_47_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_514_to_fp16 = const()[name = string("op_514_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(142562496)))];
+            tensor<fp16, [1280]> var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145839360)))];
+            tensor<fp16, [1, 1500, 1280]> linear_21_cast_fp16 = linear(bias = var_515_to_fp16, weight = var_514_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<int32, [1]> var_522_axes_0 = const()[name = string("op_522_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145841984)))];
+            tensor<fp16, [1280]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145844608)))];
+            tensor<fp16, [1, 1500, 1280]> var_522_cast_fp16 = layer_norm(axes = var_522_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_448_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_522_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145847232)))];
+            tensor<fp16, [5120]> var_532_to_fp16 = const()[name = string("op_532_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(158954496)))];
+            tensor<fp16, [1, 1500, 5120]> linear_22_cast_fp16 = linear(bias = var_532_to_fp16, weight = var_531_to_fp16, x = var_522_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_537_to_fp16 = const()[name = string("op_537_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(158964800)))];
+            tensor<fp16, [1280]> var_538_to_fp16 = const()[name = string("op_538_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(172072064)))];
+            tensor<fp16, [1, 1500, 1280]> linear_23_cast_fp16 = linear(bias = var_538_to_fp16, weight = var_537_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_55_cast_fp16")];
+            int32 var_548 = const()[name = string("op_548"), val = int32(-1)];
+            tensor<int32, [1]> var_564_axes_0 = const()[name = string("op_564_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(172074688)))];
+            tensor<fp16, [1280]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(172077312)))];
+            fp16 var_554_to_fp16 = const()[name = string("op_554_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_564_cast_fp16 = layer_norm(axes = var_564_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_554_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = string("op_564_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_575_to_fp16 = const()[name = string("op_575_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(172079936)))];
+            tensor<fp16, [1280]> var_576_to_fp16 = const()[name = string("op_576_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(175356800)))];
+            tensor<fp16, [1, 1500, 1280]> linear_24_cast_fp16 = linear(bias = var_576_to_fp16, weight = var_575_to_fp16, x = var_564_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_579_to_fp16 = const()[name = string("op_579_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(175359424)))];
+            tensor<fp16, [1, 1500, 1280]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_579_to_fp16, x = var_564_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_583_to_fp16 = const()[name = string("op_583_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(178636288)))];
+            tensor<fp16, [1280]> var_584_to_fp16 = const()[name = string("op_584_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(181913152)))];
+            tensor<fp16, [1, 1500, 1280]> linear_26_cast_fp16 = linear(bias = var_584_to_fp16, weight = var_583_to_fp16, x = var_564_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [4]> var_592 = const()[name = string("op_592"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_593_cast_fp16 = reshape(shape = var_592, x = linear_24_cast_fp16)[name = string("op_593_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_232_to_fp16 = const()[name = string("const_232_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_19_cast_fp16 = mul(x = var_593_cast_fp16, y = const_232_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> var_599 = const()[name = string("op_599"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_600_cast_fp16 = reshape(shape = var_599, x = linear_25_cast_fp16)[name = string("op_600_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_19_cast_fp16 = mul(x = var_600_cast_fp16, y = const_233_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_606 = const()[name = string("op_606"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_607_cast_fp16 = reshape(shape = var_606, x = linear_26_cast_fp16)[name = string("op_607_cast_fp16")];
+            tensor<int32, [4]> var_608 = const()[name = string("op_608"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_9_transpose_x_0 = const()[name = string("qk_9_transpose_x_0"), val = bool(false)];
+            bool qk_9_transpose_y_0 = const()[name = string("qk_9_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_73 = transpose(perm = transpose_73_perm_0, x = k_19_cast_fp16)[name = string("transpose_142")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = q_19_cast_fp16)[name = string("transpose_143")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_72, y = transpose_73)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_612_cast_fp16 = softmax(axis = var_548, x = qk_9_cast_fp16)[name = string("op_612_cast_fp16")];
+            bool var_614_transpose_x_0 = const()[name = string("op_614_transpose_x_0"), val = bool(false)];
+            bool var_614_transpose_y_0 = const()[name = string("op_614_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_608, x = var_607_cast_fp16)[name = string("transpose_141")];
+            tensor<fp16, [1, 20, 1500, 64]> var_614_cast_fp16 = matmul(transpose_x = var_614_transpose_x_0, transpose_y = var_614_transpose_y_0, x = var_612_cast_fp16, y = v_19_cast_fp16)[name = string("op_614_cast_fp16")];
+            tensor<int32, [4]> var_615 = const()[name = string("op_615"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_4 = const()[name = string("concat_4"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_616_cast_fp16 = transpose(perm = var_615, x = var_614_cast_fp16)[name = string("transpose_140")];
+            tensor<fp16, [1, 1500, 1280]> x_59_cast_fp16 = reshape(shape = concat_4, x = var_616_cast_fp16)[name = string("x_59_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_620_to_fp16 = const()[name = string("op_620_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(181915776)))];
+            tensor<fp16, [1280]> var_621_to_fp16 = const()[name = string("op_621_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(185192640)))];
+            tensor<fp16, [1, 1500, 1280]> linear_27_cast_fp16 = linear(bias = var_621_to_fp16, weight = var_620_to_fp16, x = x_59_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<int32, [1]> var_628_axes_0 = const()[name = string("op_628_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(185195264)))];
+            tensor<fp16, [1280]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(185197888)))];
+            tensor<fp16, [1, 1500, 1280]> var_628_cast_fp16 = layer_norm(axes = var_628_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_554_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = string("op_628_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_637_to_fp16 = const()[name = string("op_637_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(185200512)))];
+            tensor<fp16, [5120]> var_638_to_fp16 = const()[name = string("op_638_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(198307776)))];
+            tensor<fp16, [1, 1500, 5120]> linear_28_cast_fp16 = linear(bias = var_638_to_fp16, weight = var_637_to_fp16, x = var_628_cast_fp16)[name = string("linear_28_cast_fp16")];
+            string x_65_mode_0 = const()[name = string("x_65_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = string("x_65_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_643_to_fp16 = const()[name = string("op_643_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(198318080)))];
+            tensor<fp16, [1280]> var_644_to_fp16 = const()[name = string("op_644_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211425344)))];
+            tensor<fp16, [1, 1500, 1280]> linear_29_cast_fp16 = linear(bias = var_644_to_fp16, weight = var_643_to_fp16, x = x_65_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = string("x_67_cast_fp16")];
+            int32 var_654 = const()[name = string("op_654"), val = int32(-1)];
+            tensor<int32, [1]> var_670_axes_0 = const()[name = string("op_670_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211427968)))];
+            tensor<fp16, [1280]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211430592)))];
+            fp16 var_660_to_fp16 = const()[name = string("op_660_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_670_cast_fp16 = layer_norm(axes = var_670_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_660_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = string("op_670_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_681_to_fp16 = const()[name = string("op_681_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211433216)))];
+            tensor<fp16, [1280]> var_682_to_fp16 = const()[name = string("op_682_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(214710080)))];
+            tensor<fp16, [1, 1500, 1280]> linear_30_cast_fp16 = linear(bias = var_682_to_fp16, weight = var_681_to_fp16, x = var_670_cast_fp16)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_685_to_fp16 = const()[name = string("op_685_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(214712704)))];
+            tensor<fp16, [1, 1500, 1280]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_685_to_fp16, x = var_670_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_689_to_fp16 = const()[name = string("op_689_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(217989568)))];
+            tensor<fp16, [1280]> var_690_to_fp16 = const()[name = string("op_690_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(221266432)))];
+            tensor<fp16, [1, 1500, 1280]> linear_32_cast_fp16 = linear(bias = var_690_to_fp16, weight = var_689_to_fp16, x = var_670_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<int32, [4]> var_698 = const()[name = string("op_698"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_699_cast_fp16 = reshape(shape = var_698, x = linear_30_cast_fp16)[name = string("op_699_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_234_to_fp16 = const()[name = string("const_234_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_23_cast_fp16 = mul(x = var_699_cast_fp16, y = const_234_to_fp16)[name = string("q_23_cast_fp16")];
+            tensor<int32, [4]> var_705 = const()[name = string("op_705"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_706_cast_fp16 = reshape(shape = var_705, x = linear_31_cast_fp16)[name = string("op_706_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_235_to_fp16 = const()[name = string("const_235_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_23_cast_fp16 = mul(x = var_706_cast_fp16, y = const_235_to_fp16)[name = string("k_23_cast_fp16")];
+            tensor<int32, [4]> var_712 = const()[name = string("op_712"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_713_cast_fp16 = reshape(shape = var_712, x = linear_32_cast_fp16)[name = string("op_713_cast_fp16")];
+            tensor<int32, [4]> var_714 = const()[name = string("op_714"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
+            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_75 = transpose(perm = transpose_75_perm_0, x = k_23_cast_fp16)[name = string("transpose_138")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_74 = transpose(perm = transpose_74_perm_0, x = q_23_cast_fp16)[name = string("transpose_139")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_74, y = transpose_75)[name = string("qk_11_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_718_cast_fp16 = softmax(axis = var_654, x = qk_11_cast_fp16)[name = string("op_718_cast_fp16")];
+            bool var_720_transpose_x_0 = const()[name = string("op_720_transpose_x_0"), val = bool(false)];
+            bool var_720_transpose_y_0 = const()[name = string("op_720_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_23_cast_fp16 = transpose(perm = var_714, x = var_713_cast_fp16)[name = string("transpose_137")];
+            tensor<fp16, [1, 20, 1500, 64]> var_720_cast_fp16 = matmul(transpose_x = var_720_transpose_x_0, transpose_y = var_720_transpose_y_0, x = var_718_cast_fp16, y = v_23_cast_fp16)[name = string("op_720_cast_fp16")];
+            tensor<int32, [4]> var_721 = const()[name = string("op_721"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_722_cast_fp16 = transpose(perm = var_721, x = var_720_cast_fp16)[name = string("transpose_136")];
+            tensor<fp16, [1, 1500, 1280]> x_71_cast_fp16 = reshape(shape = concat_5, x = var_722_cast_fp16)[name = string("x_71_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_726_to_fp16 = const()[name = string("op_726_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(221269056)))];
+            tensor<fp16, [1280]> var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224545920)))];
+            tensor<fp16, [1, 1500, 1280]> linear_33_cast_fp16 = linear(bias = var_727_to_fp16, weight = var_726_to_fp16, x = x_71_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<int32, [1]> var_734_axes_0 = const()[name = string("op_734_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224548544)))];
+            tensor<fp16, [1280]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224551168)))];
+            tensor<fp16, [1, 1500, 1280]> var_734_cast_fp16 = layer_norm(axes = var_734_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_660_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = string("op_734_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224553792)))];
+            tensor<fp16, [5120]> var_744_to_fp16 = const()[name = string("op_744_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(237661056)))];
+            tensor<fp16, [1, 1500, 5120]> linear_34_cast_fp16 = linear(bias = var_744_to_fp16, weight = var_743_to_fp16, x = var_734_cast_fp16)[name = string("linear_34_cast_fp16")];
+            string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = string("x_77_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_749_to_fp16 = const()[name = string("op_749_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(237671360)))];
+            tensor<fp16, [1280]> var_750_to_fp16 = const()[name = string("op_750_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250778624)))];
+            tensor<fp16, [1, 1500, 1280]> linear_35_cast_fp16 = linear(bias = var_750_to_fp16, weight = var_749_to_fp16, x = x_77_cast_fp16)[name = string("linear_35_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_79_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = string("x_79_cast_fp16")];
+            int32 var_760 = const()[name = string("op_760"), val = int32(-1)];
+            tensor<int32, [1]> var_776_axes_0 = const()[name = string("op_776_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250781248)))];
+            tensor<fp16, [1280]> blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250783872)))];
+            fp16 var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_776_cast_fp16 = layer_norm(axes = var_776_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_766_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_79_cast_fp16)[name = string("op_776_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_787_to_fp16 = const()[name = string("op_787_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250786496)))];
+            tensor<fp16, [1280]> var_788_to_fp16 = const()[name = string("op_788_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(254063360)))];
+            tensor<fp16, [1, 1500, 1280]> linear_36_cast_fp16 = linear(bias = var_788_to_fp16, weight = var_787_to_fp16, x = var_776_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_791_to_fp16 = const()[name = string("op_791_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(254065984)))];
+            tensor<fp16, [1, 1500, 1280]> linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_791_to_fp16, x = var_776_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(257342848)))];
+            tensor<fp16, [1280]> var_796_to_fp16 = const()[name = string("op_796_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(260619712)))];
+            tensor<fp16, [1, 1500, 1280]> linear_38_cast_fp16 = linear(bias = var_796_to_fp16, weight = var_795_to_fp16, x = var_776_cast_fp16)[name = string("linear_38_cast_fp16")];
+            tensor<int32, [4]> var_804 = const()[name = string("op_804"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_805_cast_fp16 = reshape(shape = var_804, x = linear_36_cast_fp16)[name = string("op_805_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_236_to_fp16 = const()[name = string("const_236_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_27_cast_fp16 = mul(x = var_805_cast_fp16, y = const_236_to_fp16)[name = string("q_27_cast_fp16")];
+            tensor<int32, [4]> var_811 = const()[name = string("op_811"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_812_cast_fp16 = reshape(shape = var_811, x = linear_37_cast_fp16)[name = string("op_812_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_237_to_fp16 = const()[name = string("const_237_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_27_cast_fp16 = mul(x = var_812_cast_fp16, y = const_237_to_fp16)[name = string("k_27_cast_fp16")];
+            tensor<int32, [4]> var_818 = const()[name = string("op_818"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_819_cast_fp16 = reshape(shape = var_818, x = linear_38_cast_fp16)[name = string("op_819_cast_fp16")];
+            tensor<int32, [4]> var_820 = const()[name = string("op_820"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
+            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_77 = transpose(perm = transpose_77_perm_0, x = k_27_cast_fp16)[name = string("transpose_134")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = q_27_cast_fp16)[name = string("transpose_135")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_76, y = transpose_77)[name = string("qk_13_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_824_cast_fp16 = softmax(axis = var_760, x = qk_13_cast_fp16)[name = string("op_824_cast_fp16")];
+            bool var_826_transpose_x_0 = const()[name = string("op_826_transpose_x_0"), val = bool(false)];
+            bool var_826_transpose_y_0 = const()[name = string("op_826_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_27_cast_fp16 = transpose(perm = var_820, x = var_819_cast_fp16)[name = string("transpose_133")];
+            tensor<fp16, [1, 20, 1500, 64]> var_826_cast_fp16 = matmul(transpose_x = var_826_transpose_x_0, transpose_y = var_826_transpose_y_0, x = var_824_cast_fp16, y = v_27_cast_fp16)[name = string("op_826_cast_fp16")];
+            tensor<int32, [4]> var_827 = const()[name = string("op_827"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_6 = const()[name = string("concat_6"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_828_cast_fp16 = transpose(perm = var_827, x = var_826_cast_fp16)[name = string("transpose_132")];
+            tensor<fp16, [1, 1500, 1280]> x_83_cast_fp16 = reshape(shape = concat_6, x = var_828_cast_fp16)[name = string("x_83_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_832_to_fp16 = const()[name = string("op_832_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(260622336)))];
+            tensor<fp16, [1280]> var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263899200)))];
+            tensor<fp16, [1, 1500, 1280]> linear_39_cast_fp16 = linear(bias = var_833_to_fp16, weight = var_832_to_fp16, x = x_83_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_85_cast_fp16 = add(x = x_79_cast_fp16, y = linear_39_cast_fp16)[name = string("x_85_cast_fp16")];
+            tensor<int32, [1]> var_840_axes_0 = const()[name = string("op_840_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263901824)))];
+            tensor<fp16, [1280]> blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263904448)))];
+            tensor<fp16, [1, 1500, 1280]> var_840_cast_fp16 = layer_norm(axes = var_840_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_766_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_85_cast_fp16)[name = string("op_840_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263907072)))];
+            tensor<fp16, [5120]> var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(277014336)))];
+            tensor<fp16, [1, 1500, 5120]> linear_40_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = var_840_cast_fp16)[name = string("linear_40_cast_fp16")];
+            string x_89_mode_0 = const()[name = string("x_89_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_89_cast_fp16 = gelu(mode = x_89_mode_0, x = linear_40_cast_fp16)[name = string("x_89_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_855_to_fp16 = const()[name = string("op_855_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(277024640)))];
+            tensor<fp16, [1280]> var_856_to_fp16 = const()[name = string("op_856_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(290131904)))];
+            tensor<fp16, [1, 1500, 1280]> linear_41_cast_fp16 = linear(bias = var_856_to_fp16, weight = var_855_to_fp16, x = x_89_cast_fp16)[name = string("linear_41_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_91_cast_fp16 = add(x = x_85_cast_fp16, y = linear_41_cast_fp16)[name = string("x_91_cast_fp16")];
+            int32 var_866 = const()[name = string("op_866"), val = int32(-1)];
+            tensor<int32, [1]> var_882_axes_0 = const()[name = string("op_882_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(290134528)))];
+            tensor<fp16, [1280]> blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(290137152)))];
+            fp16 var_872_to_fp16 = const()[name = string("op_872_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_882_cast_fp16 = layer_norm(axes = var_882_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_872_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_91_cast_fp16)[name = string("op_882_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_893_to_fp16 = const()[name = string("op_893_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(290139776)))];
+            tensor<fp16, [1280]> var_894_to_fp16 = const()[name = string("op_894_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(293416640)))];
+            tensor<fp16, [1, 1500, 1280]> linear_42_cast_fp16 = linear(bias = var_894_to_fp16, weight = var_893_to_fp16, x = var_882_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(293419264)))];
+            tensor<fp16, [1, 1500, 1280]> linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_897_to_fp16, x = var_882_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(296696128)))];
+            tensor<fp16, [1280]> var_902_to_fp16 = const()[name = string("op_902_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(299972992)))];
+            tensor<fp16, [1, 1500, 1280]> linear_44_cast_fp16 = linear(bias = var_902_to_fp16, weight = var_901_to_fp16, x = var_882_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [4]> var_910 = const()[name = string("op_910"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_911_cast_fp16 = reshape(shape = var_910, x = linear_42_cast_fp16)[name = string("op_911_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_238_to_fp16 = const()[name = string("const_238_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_31_cast_fp16 = mul(x = var_911_cast_fp16, y = const_238_to_fp16)[name = string("q_31_cast_fp16")];
+            tensor<int32, [4]> var_917 = const()[name = string("op_917"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_918_cast_fp16 = reshape(shape = var_917, x = linear_43_cast_fp16)[name = string("op_918_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_239_to_fp16 = const()[name = string("const_239_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_31_cast_fp16 = mul(x = var_918_cast_fp16, y = const_239_to_fp16)[name = string("k_31_cast_fp16")];
+            tensor<int32, [4]> var_924 = const()[name = string("op_924"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_925_cast_fp16 = reshape(shape = var_924, x = linear_44_cast_fp16)[name = string("op_925_cast_fp16")];
+            tensor<int32, [4]> var_926 = const()[name = string("op_926"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_15_transpose_x_0 = const()[name = string("qk_15_transpose_x_0"), val = bool(false)];
+            bool qk_15_transpose_y_0 = const()[name = string("qk_15_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_79 = transpose(perm = transpose_79_perm_0, x = k_31_cast_fp16)[name = string("transpose_130")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_78 = transpose(perm = transpose_78_perm_0, x = q_31_cast_fp16)[name = string("transpose_131")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_15_cast_fp16 = matmul(transpose_x = qk_15_transpose_x_0, transpose_y = qk_15_transpose_y_0, x = transpose_78, y = transpose_79)[name = string("qk_15_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_930_cast_fp16 = softmax(axis = var_866, x = qk_15_cast_fp16)[name = string("op_930_cast_fp16")];
+            bool var_932_transpose_x_0 = const()[name = string("op_932_transpose_x_0"), val = bool(false)];
+            bool var_932_transpose_y_0 = const()[name = string("op_932_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_31_cast_fp16 = transpose(perm = var_926, x = var_925_cast_fp16)[name = string("transpose_129")];
+            tensor<fp16, [1, 20, 1500, 64]> var_932_cast_fp16 = matmul(transpose_x = var_932_transpose_x_0, transpose_y = var_932_transpose_y_0, x = var_930_cast_fp16, y = v_31_cast_fp16)[name = string("op_932_cast_fp16")];
+            tensor<int32, [4]> var_933 = const()[name = string("op_933"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_7 = const()[name = string("concat_7"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_934_cast_fp16 = transpose(perm = var_933, x = var_932_cast_fp16)[name = string("transpose_128")];
+            tensor<fp16, [1, 1500, 1280]> x_95_cast_fp16 = reshape(shape = concat_7, x = var_934_cast_fp16)[name = string("x_95_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_938_to_fp16 = const()[name = string("op_938_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(299975616)))];
+            tensor<fp16, [1280]> var_939_to_fp16 = const()[name = string("op_939_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(303252480)))];
+            tensor<fp16, [1, 1500, 1280]> linear_45_cast_fp16 = linear(bias = var_939_to_fp16, weight = var_938_to_fp16, x = x_95_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_97_cast_fp16 = add(x = x_91_cast_fp16, y = linear_45_cast_fp16)[name = string("x_97_cast_fp16")];
+            tensor<int32, [1]> var_946_axes_0 = const()[name = string("op_946_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(303255104)))];
+            tensor<fp16, [1280]> blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(303257728)))];
+            tensor<fp16, [1, 1500, 1280]> var_946_cast_fp16 = layer_norm(axes = var_946_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_872_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_97_cast_fp16)[name = string("op_946_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_955_to_fp16 = const()[name = string("op_955_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(303260352)))];
+            tensor<fp16, [5120]> var_956_to_fp16 = const()[name = string("op_956_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(316367616)))];
+            tensor<fp16, [1, 1500, 5120]> linear_46_cast_fp16 = linear(bias = var_956_to_fp16, weight = var_955_to_fp16, x = var_946_cast_fp16)[name = string("linear_46_cast_fp16")];
+            string x_101_mode_0 = const()[name = string("x_101_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_101_cast_fp16 = gelu(mode = x_101_mode_0, x = linear_46_cast_fp16)[name = string("x_101_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_961_to_fp16 = const()[name = string("op_961_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(316377920)))];
+            tensor<fp16, [1280]> var_962_to_fp16 = const()[name = string("op_962_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329485184)))];
+            tensor<fp16, [1, 1500, 1280]> linear_47_cast_fp16 = linear(bias = var_962_to_fp16, weight = var_961_to_fp16, x = x_101_cast_fp16)[name = string("linear_47_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_47_cast_fp16)[name = string("x_103_cast_fp16")];
+            int32 var_972 = const()[name = string("op_972"), val = int32(-1)];
+            tensor<int32, [1]> var_988_axes_0 = const()[name = string("op_988_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329487808)))];
+            tensor<fp16, [1280]> blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329490432)))];
+            fp16 var_978_to_fp16 = const()[name = string("op_978_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_988_cast_fp16 = layer_norm(axes = var_988_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_978_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_103_cast_fp16)[name = string("op_988_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_999_to_fp16 = const()[name = string("op_999_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329493056)))];
+            tensor<fp16, [1280]> var_1000_to_fp16 = const()[name = string("op_1000_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(332769920)))];
+            tensor<fp16, [1, 1500, 1280]> linear_48_cast_fp16 = linear(bias = var_1000_to_fp16, weight = var_999_to_fp16, x = var_988_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(332772544)))];
+            tensor<fp16, [1, 1500, 1280]> linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1003_to_fp16, x = var_988_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1007_to_fp16 = const()[name = string("op_1007_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(336049408)))];
+            tensor<fp16, [1280]> var_1008_to_fp16 = const()[name = string("op_1008_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(339326272)))];
+            tensor<fp16, [1, 1500, 1280]> linear_50_cast_fp16 = linear(bias = var_1008_to_fp16, weight = var_1007_to_fp16, x = var_988_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [4]> var_1016 = const()[name = string("op_1016"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1017_cast_fp16 = reshape(shape = var_1016, x = linear_48_cast_fp16)[name = string("op_1017_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_35_cast_fp16 = mul(x = var_1017_cast_fp16, y = const_240_to_fp16)[name = string("q_35_cast_fp16")];
+            tensor<int32, [4]> var_1023 = const()[name = string("op_1023"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1024_cast_fp16 = reshape(shape = var_1023, x = linear_49_cast_fp16)[name = string("op_1024_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_35_cast_fp16 = mul(x = var_1024_cast_fp16, y = const_241_to_fp16)[name = string("k_35_cast_fp16")];
+            tensor<int32, [4]> var_1030 = const()[name = string("op_1030"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1031_cast_fp16 = reshape(shape = var_1030, x = linear_50_cast_fp16)[name = string("op_1031_cast_fp16")];
+            tensor<int32, [4]> var_1032 = const()[name = string("op_1032"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
+            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_81 = transpose(perm = transpose_81_perm_0, x = k_35_cast_fp16)[name = string("transpose_126")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = q_35_cast_fp16)[name = string("transpose_127")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_80, y = transpose_81)[name = string("qk_17_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1036_cast_fp16 = softmax(axis = var_972, x = qk_17_cast_fp16)[name = string("op_1036_cast_fp16")];
+            bool var_1038_transpose_x_0 = const()[name = string("op_1038_transpose_x_0"), val = bool(false)];
+            bool var_1038_transpose_y_0 = const()[name = string("op_1038_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_35_cast_fp16 = transpose(perm = var_1032, x = var_1031_cast_fp16)[name = string("transpose_125")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1038_cast_fp16 = matmul(transpose_x = var_1038_transpose_x_0, transpose_y = var_1038_transpose_y_0, x = var_1036_cast_fp16, y = v_35_cast_fp16)[name = string("op_1038_cast_fp16")];
+            tensor<int32, [4]> var_1039 = const()[name = string("op_1039"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1040_cast_fp16 = transpose(perm = var_1039, x = var_1038_cast_fp16)[name = string("transpose_124")];
+            tensor<fp16, [1, 1500, 1280]> x_107_cast_fp16 = reshape(shape = concat_8, x = var_1040_cast_fp16)[name = string("x_107_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1044_to_fp16 = const()[name = string("op_1044_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(339328896)))];
+            tensor<fp16, [1280]> var_1045_to_fp16 = const()[name = string("op_1045_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342605760)))];
+            tensor<fp16, [1, 1500, 1280]> linear_51_cast_fp16 = linear(bias = var_1045_to_fp16, weight = var_1044_to_fp16, x = x_107_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_109_cast_fp16 = add(x = x_103_cast_fp16, y = linear_51_cast_fp16)[name = string("x_109_cast_fp16")];
+            tensor<int32, [1]> var_1052_axes_0 = const()[name = string("op_1052_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342608384)))];
+            tensor<fp16, [1280]> blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342611008)))];
+            tensor<fp16, [1, 1500, 1280]> var_1052_cast_fp16 = layer_norm(axes = var_1052_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_978_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_109_cast_fp16)[name = string("op_1052_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342613632)))];
+            tensor<fp16, [5120]> var_1062_to_fp16 = const()[name = string("op_1062_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(355720896)))];
+            tensor<fp16, [1, 1500, 5120]> linear_52_cast_fp16 = linear(bias = var_1062_to_fp16, weight = var_1061_to_fp16, x = var_1052_cast_fp16)[name = string("linear_52_cast_fp16")];
+            string x_113_mode_0 = const()[name = string("x_113_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_113_cast_fp16 = gelu(mode = x_113_mode_0, x = linear_52_cast_fp16)[name = string("x_113_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1067_to_fp16 = const()[name = string("op_1067_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(355731200)))];
+            tensor<fp16, [1280]> var_1068_to_fp16 = const()[name = string("op_1068_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368838464)))];
+            tensor<fp16, [1, 1500, 1280]> linear_53_cast_fp16 = linear(bias = var_1068_to_fp16, weight = var_1067_to_fp16, x = x_113_cast_fp16)[name = string("linear_53_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_115_cast_fp16 = add(x = x_109_cast_fp16, y = linear_53_cast_fp16)[name = string("x_115_cast_fp16")];
+            int32 var_1078 = const()[name = string("op_1078"), val = int32(-1)];
+            tensor<int32, [1]> var_1094_axes_0 = const()[name = string("op_1094_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368841088)))];
+            tensor<fp16, [1280]> blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368843712)))];
+            fp16 var_1084_to_fp16 = const()[name = string("op_1084_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1094_cast_fp16 = layer_norm(axes = var_1094_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_1084_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_115_cast_fp16)[name = string("op_1094_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1105_to_fp16 = const()[name = string("op_1105_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368846336)))];
+            tensor<fp16, [1280]> var_1106_to_fp16 = const()[name = string("op_1106_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(372123200)))];
+            tensor<fp16, [1, 1500, 1280]> linear_54_cast_fp16 = linear(bias = var_1106_to_fp16, weight = var_1105_to_fp16, x = var_1094_cast_fp16)[name = string("linear_54_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(372125824)))];
+            tensor<fp16, [1, 1500, 1280]> linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1109_to_fp16, x = var_1094_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1113_to_fp16 = const()[name = string("op_1113_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(375402688)))];
+            tensor<fp16, [1280]> var_1114_to_fp16 = const()[name = string("op_1114_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(378679552)))];
+            tensor<fp16, [1, 1500, 1280]> linear_56_cast_fp16 = linear(bias = var_1114_to_fp16, weight = var_1113_to_fp16, x = var_1094_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<int32, [4]> var_1122 = const()[name = string("op_1122"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1123_cast_fp16 = reshape(shape = var_1122, x = linear_54_cast_fp16)[name = string("op_1123_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_242_to_fp16 = const()[name = string("const_242_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_39_cast_fp16 = mul(x = var_1123_cast_fp16, y = const_242_to_fp16)[name = string("q_39_cast_fp16")];
+            tensor<int32, [4]> var_1129 = const()[name = string("op_1129"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1130_cast_fp16 = reshape(shape = var_1129, x = linear_55_cast_fp16)[name = string("op_1130_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_39_cast_fp16 = mul(x = var_1130_cast_fp16, y = const_243_to_fp16)[name = string("k_39_cast_fp16")];
+            tensor<int32, [4]> var_1136 = const()[name = string("op_1136"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1137_cast_fp16 = reshape(shape = var_1136, x = linear_56_cast_fp16)[name = string("op_1137_cast_fp16")];
+            tensor<int32, [4]> var_1138 = const()[name = string("op_1138"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
+            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_83 = transpose(perm = transpose_83_perm_0, x = k_39_cast_fp16)[name = string("transpose_122")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_82 = transpose(perm = transpose_82_perm_0, x = q_39_cast_fp16)[name = string("transpose_123")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_82, y = transpose_83)[name = string("qk_19_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1142_cast_fp16 = softmax(axis = var_1078, x = qk_19_cast_fp16)[name = string("op_1142_cast_fp16")];
+            bool var_1144_transpose_x_0 = const()[name = string("op_1144_transpose_x_0"), val = bool(false)];
+            bool var_1144_transpose_y_0 = const()[name = string("op_1144_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_39_cast_fp16 = transpose(perm = var_1138, x = var_1137_cast_fp16)[name = string("transpose_121")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1144_cast_fp16 = matmul(transpose_x = var_1144_transpose_x_0, transpose_y = var_1144_transpose_y_0, x = var_1142_cast_fp16, y = v_39_cast_fp16)[name = string("op_1144_cast_fp16")];
+            tensor<int32, [4]> var_1145 = const()[name = string("op_1145"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_9 = const()[name = string("concat_9"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1146_cast_fp16 = transpose(perm = var_1145, x = var_1144_cast_fp16)[name = string("transpose_120")];
+            tensor<fp16, [1, 1500, 1280]> x_119_cast_fp16 = reshape(shape = concat_9, x = var_1146_cast_fp16)[name = string("x_119_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1150_to_fp16 = const()[name = string("op_1150_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(378682176)))];
+            tensor<fp16, [1280]> var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381959040)))];
+            tensor<fp16, [1, 1500, 1280]> linear_57_cast_fp16 = linear(bias = var_1151_to_fp16, weight = var_1150_to_fp16, x = x_119_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_121_cast_fp16 = add(x = x_115_cast_fp16, y = linear_57_cast_fp16)[name = string("x_121_cast_fp16")];
+            tensor<int32, [1]> var_1158_axes_0 = const()[name = string("op_1158_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381961664)))];
+            tensor<fp16, [1280]> blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381964288)))];
+            tensor<fp16, [1, 1500, 1280]> var_1158_cast_fp16 = layer_norm(axes = var_1158_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_1084_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_121_cast_fp16)[name = string("op_1158_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1167_to_fp16 = const()[name = string("op_1167_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381966912)))];
+            tensor<fp16, [5120]> var_1168_to_fp16 = const()[name = string("op_1168_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(395074176)))];
+            tensor<fp16, [1, 1500, 5120]> linear_58_cast_fp16 = linear(bias = var_1168_to_fp16, weight = var_1167_to_fp16, x = var_1158_cast_fp16)[name = string("linear_58_cast_fp16")];
+            string x_125_mode_0 = const()[name = string("x_125_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = linear_58_cast_fp16)[name = string("x_125_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1173_to_fp16 = const()[name = string("op_1173_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(395084480)))];
+            tensor<fp16, [1280]> var_1174_to_fp16 = const()[name = string("op_1174_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(408191744)))];
+            tensor<fp16, [1, 1500, 1280]> linear_59_cast_fp16 = linear(bias = var_1174_to_fp16, weight = var_1173_to_fp16, x = x_125_cast_fp16)[name = string("linear_59_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_127_cast_fp16 = add(x = x_121_cast_fp16, y = linear_59_cast_fp16)[name = string("x_127_cast_fp16")];
+            int32 var_1184 = const()[name = string("op_1184"), val = int32(-1)];
+            tensor<int32, [1]> var_1200_axes_0 = const()[name = string("op_1200_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(408194368)))];
+            tensor<fp16, [1280]> blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(408196992)))];
+            fp16 var_1190_to_fp16 = const()[name = string("op_1190_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1200_cast_fp16 = layer_norm(axes = var_1200_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_1190_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_127_cast_fp16)[name = string("op_1200_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(408199616)))];
+            tensor<fp16, [1280]> var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(411476480)))];
+            tensor<fp16, [1, 1500, 1280]> linear_60_cast_fp16 = linear(bias = var_1212_to_fp16, weight = var_1211_to_fp16, x = var_1200_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1215_to_fp16 = const()[name = string("op_1215_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(411479104)))];
+            tensor<fp16, [1, 1500, 1280]> linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1215_to_fp16, x = var_1200_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1219_to_fp16 = const()[name = string("op_1219_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(414755968)))];
+            tensor<fp16, [1280]> var_1220_to_fp16 = const()[name = string("op_1220_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(418032832)))];
+            tensor<fp16, [1, 1500, 1280]> linear_62_cast_fp16 = linear(bias = var_1220_to_fp16, weight = var_1219_to_fp16, x = var_1200_cast_fp16)[name = string("linear_62_cast_fp16")];
+            tensor<int32, [4]> var_1228 = const()[name = string("op_1228"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1229_cast_fp16 = reshape(shape = var_1228, x = linear_60_cast_fp16)[name = string("op_1229_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_244_to_fp16 = const()[name = string("const_244_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_43_cast_fp16 = mul(x = var_1229_cast_fp16, y = const_244_to_fp16)[name = string("q_43_cast_fp16")];
+            tensor<int32, [4]> var_1235 = const()[name = string("op_1235"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1236_cast_fp16 = reshape(shape = var_1235, x = linear_61_cast_fp16)[name = string("op_1236_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_43_cast_fp16 = mul(x = var_1236_cast_fp16, y = const_245_to_fp16)[name = string("k_43_cast_fp16")];
+            tensor<int32, [4]> var_1242 = const()[name = string("op_1242"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1243_cast_fp16 = reshape(shape = var_1242, x = linear_62_cast_fp16)[name = string("op_1243_cast_fp16")];
+            tensor<int32, [4]> var_1244 = const()[name = string("op_1244"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_21_transpose_x_0 = const()[name = string("qk_21_transpose_x_0"), val = bool(false)];
+            bool qk_21_transpose_y_0 = const()[name = string("qk_21_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_85 = transpose(perm = transpose_85_perm_0, x = k_43_cast_fp16)[name = string("transpose_118")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = q_43_cast_fp16)[name = string("transpose_119")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_21_cast_fp16 = matmul(transpose_x = qk_21_transpose_x_0, transpose_y = qk_21_transpose_y_0, x = transpose_84, y = transpose_85)[name = string("qk_21_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1248_cast_fp16 = softmax(axis = var_1184, x = qk_21_cast_fp16)[name = string("op_1248_cast_fp16")];
+            bool var_1250_transpose_x_0 = const()[name = string("op_1250_transpose_x_0"), val = bool(false)];
+            bool var_1250_transpose_y_0 = const()[name = string("op_1250_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_43_cast_fp16 = transpose(perm = var_1244, x = var_1243_cast_fp16)[name = string("transpose_117")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1250_cast_fp16 = matmul(transpose_x = var_1250_transpose_x_0, transpose_y = var_1250_transpose_y_0, x = var_1248_cast_fp16, y = v_43_cast_fp16)[name = string("op_1250_cast_fp16")];
+            tensor<int32, [4]> var_1251 = const()[name = string("op_1251"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_10 = const()[name = string("concat_10"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1252_cast_fp16 = transpose(perm = var_1251, x = var_1250_cast_fp16)[name = string("transpose_116")];
+            tensor<fp16, [1, 1500, 1280]> x_131_cast_fp16 = reshape(shape = concat_10, x = var_1252_cast_fp16)[name = string("x_131_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1256_to_fp16 = const()[name = string("op_1256_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(418035456)))];
+            tensor<fp16, [1280]> var_1257_to_fp16 = const()[name = string("op_1257_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(421312320)))];
+            tensor<fp16, [1, 1500, 1280]> linear_63_cast_fp16 = linear(bias = var_1257_to_fp16, weight = var_1256_to_fp16, x = x_131_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_133_cast_fp16 = add(x = x_127_cast_fp16, y = linear_63_cast_fp16)[name = string("x_133_cast_fp16")];
+            tensor<int32, [1]> var_1264_axes_0 = const()[name = string("op_1264_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(421314944)))];
+            tensor<fp16, [1280]> blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(421317568)))];
+            tensor<fp16, [1, 1500, 1280]> var_1264_cast_fp16 = layer_norm(axes = var_1264_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_1190_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_133_cast_fp16)[name = string("op_1264_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(421320192)))];
+            tensor<fp16, [5120]> var_1274_to_fp16 = const()[name = string("op_1274_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(434427456)))];
+            tensor<fp16, [1, 1500, 5120]> linear_64_cast_fp16 = linear(bias = var_1274_to_fp16, weight = var_1273_to_fp16, x = var_1264_cast_fp16)[name = string("linear_64_cast_fp16")];
+            string x_137_mode_0 = const()[name = string("x_137_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_137_cast_fp16 = gelu(mode = x_137_mode_0, x = linear_64_cast_fp16)[name = string("x_137_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1279_to_fp16 = const()[name = string("op_1279_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(434437760)))];
+            tensor<fp16, [1280]> var_1280_to_fp16 = const()[name = string("op_1280_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447545024)))];
+            tensor<fp16, [1, 1500, 1280]> linear_65_cast_fp16 = linear(bias = var_1280_to_fp16, weight = var_1279_to_fp16, x = x_137_cast_fp16)[name = string("linear_65_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_139_cast_fp16 = add(x = x_133_cast_fp16, y = linear_65_cast_fp16)[name = string("x_139_cast_fp16")];
+            int32 var_1290 = const()[name = string("op_1290"), val = int32(-1)];
+            tensor<int32, [1]> var_1306_axes_0 = const()[name = string("op_1306_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447547648)))];
+            tensor<fp16, [1280]> blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447550272)))];
+            fp16 var_1296_to_fp16 = const()[name = string("op_1296_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1306_cast_fp16 = layer_norm(axes = var_1306_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_1296_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_139_cast_fp16)[name = string("op_1306_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1317_to_fp16 = const()[name = string("op_1317_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447552896)))];
+            tensor<fp16, [1280]> var_1318_to_fp16 = const()[name = string("op_1318_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(450829760)))];
+            tensor<fp16, [1, 1500, 1280]> linear_66_cast_fp16 = linear(bias = var_1318_to_fp16, weight = var_1317_to_fp16, x = var_1306_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1321_to_fp16 = const()[name = string("op_1321_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(450832384)))];
+            tensor<fp16, [1, 1500, 1280]> linear_67_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1321_to_fp16, x = var_1306_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1325_to_fp16 = const()[name = string("op_1325_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(454109248)))];
+            tensor<fp16, [1280]> var_1326_to_fp16 = const()[name = string("op_1326_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(457386112)))];
+            tensor<fp16, [1, 1500, 1280]> linear_68_cast_fp16 = linear(bias = var_1326_to_fp16, weight = var_1325_to_fp16, x = var_1306_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [4]> var_1334 = const()[name = string("op_1334"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1335_cast_fp16 = reshape(shape = var_1334, x = linear_66_cast_fp16)[name = string("op_1335_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_246_to_fp16 = const()[name = string("const_246_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_47_cast_fp16 = mul(x = var_1335_cast_fp16, y = const_246_to_fp16)[name = string("q_47_cast_fp16")];
+            tensor<int32, [4]> var_1341 = const()[name = string("op_1341"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1342_cast_fp16 = reshape(shape = var_1341, x = linear_67_cast_fp16)[name = string("op_1342_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_247_to_fp16 = const()[name = string("const_247_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_47_cast_fp16 = mul(x = var_1342_cast_fp16, y = const_247_to_fp16)[name = string("k_47_cast_fp16")];
+            tensor<int32, [4]> var_1348 = const()[name = string("op_1348"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1349_cast_fp16 = reshape(shape = var_1348, x = linear_68_cast_fp16)[name = string("op_1349_cast_fp16")];
+            tensor<int32, [4]> var_1350 = const()[name = string("op_1350"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)];
+            bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_87 = transpose(perm = transpose_87_perm_0, x = k_47_cast_fp16)[name = string("transpose_114")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_86 = transpose(perm = transpose_86_perm_0, x = q_47_cast_fp16)[name = string("transpose_115")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_86, y = transpose_87)[name = string("qk_23_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1354_cast_fp16 = softmax(axis = var_1290, x = qk_23_cast_fp16)[name = string("op_1354_cast_fp16")];
+            bool var_1356_transpose_x_0 = const()[name = string("op_1356_transpose_x_0"), val = bool(false)];
+            bool var_1356_transpose_y_0 = const()[name = string("op_1356_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_47_cast_fp16 = transpose(perm = var_1350, x = var_1349_cast_fp16)[name = string("transpose_113")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1356_cast_fp16 = matmul(transpose_x = var_1356_transpose_x_0, transpose_y = var_1356_transpose_y_0, x = var_1354_cast_fp16, y = v_47_cast_fp16)[name = string("op_1356_cast_fp16")];
+            tensor<int32, [4]> var_1357 = const()[name = string("op_1357"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1358_cast_fp16 = transpose(perm = var_1357, x = var_1356_cast_fp16)[name = string("transpose_112")];
+            tensor<fp16, [1, 1500, 1280]> x_143_cast_fp16 = reshape(shape = concat_11, x = var_1358_cast_fp16)[name = string("x_143_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(457388736)))];
+            tensor<fp16, [1280]> var_1363_to_fp16 = const()[name = string("op_1363_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460665600)))];
+            tensor<fp16, [1, 1500, 1280]> linear_69_cast_fp16 = linear(bias = var_1363_to_fp16, weight = var_1362_to_fp16, x = x_143_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_145_cast_fp16 = add(x = x_139_cast_fp16, y = linear_69_cast_fp16)[name = string("x_145_cast_fp16")];
+            tensor<int32, [1]> var_1370_axes_0 = const()[name = string("op_1370_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460668224)))];
+            tensor<fp16, [1280]> blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460670848)))];
+            tensor<fp16, [1, 1500, 1280]> var_1370_cast_fp16 = layer_norm(axes = var_1370_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_1296_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_145_cast_fp16)[name = string("op_1370_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1379_to_fp16 = const()[name = string("op_1379_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460673472)))];
+            tensor<fp16, [5120]> var_1380_to_fp16 = const()[name = string("op_1380_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(473780736)))];
+            tensor<fp16, [1, 1500, 5120]> linear_70_cast_fp16 = linear(bias = var_1380_to_fp16, weight = var_1379_to_fp16, x = var_1370_cast_fp16)[name = string("linear_70_cast_fp16")];
+            string x_149_mode_0 = const()[name = string("x_149_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_149_cast_fp16 = gelu(mode = x_149_mode_0, x = linear_70_cast_fp16)[name = string("x_149_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1385_to_fp16 = const()[name = string("op_1385_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(473791040)))];
+            tensor<fp16, [1280]> var_1386_to_fp16 = const()[name = string("op_1386_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486898304)))];
+            tensor<fp16, [1, 1500, 1280]> linear_71_cast_fp16 = linear(bias = var_1386_to_fp16, weight = var_1385_to_fp16, x = x_149_cast_fp16)[name = string("linear_71_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_151_cast_fp16 = add(x = x_145_cast_fp16, y = linear_71_cast_fp16)[name = string("x_151_cast_fp16")];
+            int32 var_1396 = const()[name = string("op_1396"), val = int32(-1)];
+            tensor<int32, [1]> var_1412_axes_0 = const()[name = string("op_1412_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486900928)))];
+            tensor<fp16, [1280]> blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486903552)))];
+            fp16 var_1402_to_fp16 = const()[name = string("op_1402_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1412_cast_fp16 = layer_norm(axes = var_1412_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_1402_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_151_cast_fp16)[name = string("op_1412_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486906176)))];
+            tensor<fp16, [1280]> var_1424_to_fp16 = const()[name = string("op_1424_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(490183040)))];
+            tensor<fp16, [1, 1500, 1280]> linear_72_cast_fp16 = linear(bias = var_1424_to_fp16, weight = var_1423_to_fp16, x = var_1412_cast_fp16)[name = string("linear_72_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1427_to_fp16 = const()[name = string("op_1427_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(490185664)))];
+            tensor<fp16, [1, 1500, 1280]> linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1427_to_fp16, x = var_1412_cast_fp16)[name = string("linear_73_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1431_to_fp16 = const()[name = string("op_1431_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(493462528)))];
+            tensor<fp16, [1280]> var_1432_to_fp16 = const()[name = string("op_1432_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(496739392)))];
+            tensor<fp16, [1, 1500, 1280]> linear_74_cast_fp16 = linear(bias = var_1432_to_fp16, weight = var_1431_to_fp16, x = var_1412_cast_fp16)[name = string("linear_74_cast_fp16")];
+            tensor<int32, [4]> var_1440 = const()[name = string("op_1440"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1441_cast_fp16 = reshape(shape = var_1440, x = linear_72_cast_fp16)[name = string("op_1441_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_248_to_fp16 = const()[name = string("const_248_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_51_cast_fp16 = mul(x = var_1441_cast_fp16, y = const_248_to_fp16)[name = string("q_51_cast_fp16")];
+            tensor<int32, [4]> var_1447 = const()[name = string("op_1447"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1448_cast_fp16 = reshape(shape = var_1447, x = linear_73_cast_fp16)[name = string("op_1448_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_51_cast_fp16 = mul(x = var_1448_cast_fp16, y = const_249_to_fp16)[name = string("k_51_cast_fp16")];
+            tensor<int32, [4]> var_1454 = const()[name = string("op_1454"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1455_cast_fp16 = reshape(shape = var_1454, x = linear_74_cast_fp16)[name = string("op_1455_cast_fp16")];
+            tensor<int32, [4]> var_1456 = const()[name = string("op_1456"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)];
+            bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_89 = transpose(perm = transpose_89_perm_0, x = k_51_cast_fp16)[name = string("transpose_110")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = q_51_cast_fp16)[name = string("transpose_111")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_88, y = transpose_89)[name = string("qk_25_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1460_cast_fp16 = softmax(axis = var_1396, x = qk_25_cast_fp16)[name = string("op_1460_cast_fp16")];
+            bool var_1462_transpose_x_0 = const()[name = string("op_1462_transpose_x_0"), val = bool(false)];
+            bool var_1462_transpose_y_0 = const()[name = string("op_1462_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_51_cast_fp16 = transpose(perm = var_1456, x = var_1455_cast_fp16)[name = string("transpose_109")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1462_cast_fp16 = matmul(transpose_x = var_1462_transpose_x_0, transpose_y = var_1462_transpose_y_0, x = var_1460_cast_fp16, y = v_51_cast_fp16)[name = string("op_1462_cast_fp16")];
+            tensor<int32, [4]> var_1463 = const()[name = string("op_1463"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_12 = const()[name = string("concat_12"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1464_cast_fp16 = transpose(perm = var_1463, x = var_1462_cast_fp16)[name = string("transpose_108")];
+            tensor<fp16, [1, 1500, 1280]> x_155_cast_fp16 = reshape(shape = concat_12, x = var_1464_cast_fp16)[name = string("x_155_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1468_to_fp16 = const()[name = string("op_1468_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(496742016)))];
+            tensor<fp16, [1280]> var_1469_to_fp16 = const()[name = string("op_1469_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(500018880)))];
+            tensor<fp16, [1, 1500, 1280]> linear_75_cast_fp16 = linear(bias = var_1469_to_fp16, weight = var_1468_to_fp16, x = x_155_cast_fp16)[name = string("linear_75_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_157_cast_fp16 = add(x = x_151_cast_fp16, y = linear_75_cast_fp16)[name = string("x_157_cast_fp16")];
+            tensor<int32, [1]> var_1476_axes_0 = const()[name = string("op_1476_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(500021504)))];
+            tensor<fp16, [1280]> blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(500024128)))];
+            tensor<fp16, [1, 1500, 1280]> var_1476_cast_fp16 = layer_norm(axes = var_1476_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_1402_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_157_cast_fp16)[name = string("op_1476_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1485_to_fp16 = const()[name = string("op_1485_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(500026752)))];
+            tensor<fp16, [5120]> var_1486_to_fp16 = const()[name = string("op_1486_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(513134016)))];
+            tensor<fp16, [1, 1500, 5120]> linear_76_cast_fp16 = linear(bias = var_1486_to_fp16, weight = var_1485_to_fp16, x = var_1476_cast_fp16)[name = string("linear_76_cast_fp16")];
+            string x_161_mode_0 = const()[name = string("x_161_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_161_cast_fp16 = gelu(mode = x_161_mode_0, x = linear_76_cast_fp16)[name = string("x_161_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1491_to_fp16 = const()[name = string("op_1491_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(513144320)))];
+            tensor<fp16, [1280]> var_1492_to_fp16 = const()[name = string("op_1492_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(526251584)))];
+            tensor<fp16, [1, 1500, 1280]> linear_77_cast_fp16 = linear(bias = var_1492_to_fp16, weight = var_1491_to_fp16, x = x_161_cast_fp16)[name = string("linear_77_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_163_cast_fp16 = add(x = x_157_cast_fp16, y = linear_77_cast_fp16)[name = string("x_163_cast_fp16")];
+            int32 var_1502 = const()[name = string("op_1502"), val = int32(-1)];
+            tensor<int32, [1]> var_1518_axes_0 = const()[name = string("op_1518_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(526254208)))];
+            tensor<fp16, [1280]> blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(526256832)))];
+            fp16 var_1508_to_fp16 = const()[name = string("op_1508_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1518_cast_fp16 = layer_norm(axes = var_1518_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_1508_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_163_cast_fp16)[name = string("op_1518_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1529_to_fp16 = const()[name = string("op_1529_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(526259456)))];
+            tensor<fp16, [1280]> var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(529536320)))];
+            tensor<fp16, [1, 1500, 1280]> linear_78_cast_fp16 = linear(bias = var_1530_to_fp16, weight = var_1529_to_fp16, x = var_1518_cast_fp16)[name = string("linear_78_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1533_to_fp16 = const()[name = string("op_1533_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(529538944)))];
+            tensor<fp16, [1, 1500, 1280]> linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1533_to_fp16, x = var_1518_cast_fp16)[name = string("linear_79_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1537_to_fp16 = const()[name = string("op_1537_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(532815808)))];
+            tensor<fp16, [1280]> var_1538_to_fp16 = const()[name = string("op_1538_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(536092672)))];
+            tensor<fp16, [1, 1500, 1280]> linear_80_cast_fp16 = linear(bias = var_1538_to_fp16, weight = var_1537_to_fp16, x = var_1518_cast_fp16)[name = string("linear_80_cast_fp16")];
+            tensor<int32, [4]> var_1546 = const()[name = string("op_1546"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1547_cast_fp16 = reshape(shape = var_1546, x = linear_78_cast_fp16)[name = string("op_1547_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_55_cast_fp16 = mul(x = var_1547_cast_fp16, y = const_250_to_fp16)[name = string("q_55_cast_fp16")];
+            tensor<int32, [4]> var_1553 = const()[name = string("op_1553"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1554_cast_fp16 = reshape(shape = var_1553, x = linear_79_cast_fp16)[name = string("op_1554_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_55_cast_fp16 = mul(x = var_1554_cast_fp16, y = const_251_to_fp16)[name = string("k_55_cast_fp16")];
+            tensor<int32, [4]> var_1560 = const()[name = string("op_1560"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1561_cast_fp16 = reshape(shape = var_1560, x = linear_80_cast_fp16)[name = string("op_1561_cast_fp16")];
+            tensor<int32, [4]> var_1562 = const()[name = string("op_1562"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_27_transpose_x_0 = const()[name = string("qk_27_transpose_x_0"), val = bool(false)];
+            bool qk_27_transpose_y_0 = const()[name = string("qk_27_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_91 = transpose(perm = transpose_91_perm_0, x = k_55_cast_fp16)[name = string("transpose_106")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_90 = transpose(perm = transpose_90_perm_0, x = q_55_cast_fp16)[name = string("transpose_107")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_27_cast_fp16 = matmul(transpose_x = qk_27_transpose_x_0, transpose_y = qk_27_transpose_y_0, x = transpose_90, y = transpose_91)[name = string("qk_27_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1566_cast_fp16 = softmax(axis = var_1502, x = qk_27_cast_fp16)[name = string("op_1566_cast_fp16")];
+            bool var_1568_transpose_x_0 = const()[name = string("op_1568_transpose_x_0"), val = bool(false)];
+            bool var_1568_transpose_y_0 = const()[name = string("op_1568_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_55_cast_fp16 = transpose(perm = var_1562, x = var_1561_cast_fp16)[name = string("transpose_105")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1568_cast_fp16 = matmul(transpose_x = var_1568_transpose_x_0, transpose_y = var_1568_transpose_y_0, x = var_1566_cast_fp16, y = v_55_cast_fp16)[name = string("op_1568_cast_fp16")];
+            tensor<int32, [4]> var_1569 = const()[name = string("op_1569"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_13 = const()[name = string("concat_13"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1570_cast_fp16 = transpose(perm = var_1569, x = var_1568_cast_fp16)[name = string("transpose_104")];
+            tensor<fp16, [1, 1500, 1280]> x_167_cast_fp16 = reshape(shape = concat_13, x = var_1570_cast_fp16)[name = string("x_167_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1574_to_fp16 = const()[name = string("op_1574_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(536095296)))];
+            tensor<fp16, [1280]> var_1575_to_fp16 = const()[name = string("op_1575_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539372160)))];
+            tensor<fp16, [1, 1500, 1280]> linear_81_cast_fp16 = linear(bias = var_1575_to_fp16, weight = var_1574_to_fp16, x = x_167_cast_fp16)[name = string("linear_81_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_169_cast_fp16 = add(x = x_163_cast_fp16, y = linear_81_cast_fp16)[name = string("x_169_cast_fp16")];
+            tensor<int32, [1]> var_1582_axes_0 = const()[name = string("op_1582_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539374784)))];
+            tensor<fp16, [1280]> blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539377408)))];
+            tensor<fp16, [1, 1500, 1280]> var_1582_cast_fp16 = layer_norm(axes = var_1582_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_1508_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_169_cast_fp16)[name = string("op_1582_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1591_to_fp16 = const()[name = string("op_1591_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539380032)))];
+            tensor<fp16, [5120]> var_1592_to_fp16 = const()[name = string("op_1592_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(552487296)))];
+            tensor<fp16, [1, 1500, 5120]> linear_82_cast_fp16 = linear(bias = var_1592_to_fp16, weight = var_1591_to_fp16, x = var_1582_cast_fp16)[name = string("linear_82_cast_fp16")];
+            string x_173_mode_0 = const()[name = string("x_173_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_173_cast_fp16 = gelu(mode = x_173_mode_0, x = linear_82_cast_fp16)[name = string("x_173_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1597_to_fp16 = const()[name = string("op_1597_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(552497600)))];
+            tensor<fp16, [1280]> var_1598_to_fp16 = const()[name = string("op_1598_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565604864)))];
+            tensor<fp16, [1, 1500, 1280]> linear_83_cast_fp16 = linear(bias = var_1598_to_fp16, weight = var_1597_to_fp16, x = x_173_cast_fp16)[name = string("linear_83_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_175_cast_fp16 = add(x = x_169_cast_fp16, y = linear_83_cast_fp16)[name = string("x_175_cast_fp16")];
+            int32 var_1608 = const()[name = string("op_1608"), val = int32(-1)];
+            tensor<int32, [1]> var_1624_axes_0 = const()[name = string("op_1624_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565607488)))];
+            tensor<fp16, [1280]> blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565610112)))];
+            fp16 var_1614_to_fp16 = const()[name = string("op_1614_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1624_cast_fp16 = layer_norm(axes = var_1624_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_1614_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_175_cast_fp16)[name = string("op_1624_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1635_to_fp16 = const()[name = string("op_1635_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565612736)))];
+            tensor<fp16, [1280]> var_1636_to_fp16 = const()[name = string("op_1636_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(568889600)))];
+            tensor<fp16, [1, 1500, 1280]> linear_84_cast_fp16 = linear(bias = var_1636_to_fp16, weight = var_1635_to_fp16, x = var_1624_cast_fp16)[name = string("linear_84_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1639_to_fp16 = const()[name = string("op_1639_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(568892224)))];
+            tensor<fp16, [1, 1500, 1280]> linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1639_to_fp16, x = var_1624_cast_fp16)[name = string("linear_85_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(572169088)))];
+            tensor<fp16, [1280]> var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(575445952)))];
+            tensor<fp16, [1, 1500, 1280]> linear_86_cast_fp16 = linear(bias = var_1644_to_fp16, weight = var_1643_to_fp16, x = var_1624_cast_fp16)[name = string("linear_86_cast_fp16")];
+            tensor<int32, [4]> var_1652 = const()[name = string("op_1652"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1653_cast_fp16 = reshape(shape = var_1652, x = linear_84_cast_fp16)[name = string("op_1653_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_252_to_fp16 = const()[name = string("const_252_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_59_cast_fp16 = mul(x = var_1653_cast_fp16, y = const_252_to_fp16)[name = string("q_59_cast_fp16")];
+            tensor<int32, [4]> var_1659 = const()[name = string("op_1659"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1660_cast_fp16 = reshape(shape = var_1659, x = linear_85_cast_fp16)[name = string("op_1660_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_253_to_fp16 = const()[name = string("const_253_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_59_cast_fp16 = mul(x = var_1660_cast_fp16, y = const_253_to_fp16)[name = string("k_59_cast_fp16")];
+            tensor<int32, [4]> var_1666 = const()[name = string("op_1666"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1667_cast_fp16 = reshape(shape = var_1666, x = linear_86_cast_fp16)[name = string("op_1667_cast_fp16")];
+            tensor<int32, [4]> var_1668 = const()[name = string("op_1668"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)];
+            bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_93 = transpose(perm = transpose_93_perm_0, x = k_59_cast_fp16)[name = string("transpose_102")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = q_59_cast_fp16)[name = string("transpose_103")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_92, y = transpose_93)[name = string("qk_29_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1672_cast_fp16 = softmax(axis = var_1608, x = qk_29_cast_fp16)[name = string("op_1672_cast_fp16")];
+            bool var_1674_transpose_x_0 = const()[name = string("op_1674_transpose_x_0"), val = bool(false)];
+            bool var_1674_transpose_y_0 = const()[name = string("op_1674_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_59_cast_fp16 = transpose(perm = var_1668, x = var_1667_cast_fp16)[name = string("transpose_101")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1674_cast_fp16 = matmul(transpose_x = var_1674_transpose_x_0, transpose_y = var_1674_transpose_y_0, x = var_1672_cast_fp16, y = v_59_cast_fp16)[name = string("op_1674_cast_fp16")];
+            tensor<int32, [4]> var_1675 = const()[name = string("op_1675"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1676_cast_fp16 = transpose(perm = var_1675, x = var_1674_cast_fp16)[name = string("transpose_100")];
+            tensor<fp16, [1, 1500, 1280]> x_179_cast_fp16 = reshape(shape = concat_14, x = var_1676_cast_fp16)[name = string("x_179_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1680_to_fp16 = const()[name = string("op_1680_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(575448576)))];
+            tensor<fp16, [1280]> var_1681_to_fp16 = const()[name = string("op_1681_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578725440)))];
+            tensor<fp16, [1, 1500, 1280]> linear_87_cast_fp16 = linear(bias = var_1681_to_fp16, weight = var_1680_to_fp16, x = x_179_cast_fp16)[name = string("linear_87_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_181_cast_fp16 = add(x = x_175_cast_fp16, y = linear_87_cast_fp16)[name = string("x_181_cast_fp16")];
+            tensor<int32, [1]> var_1688_axes_0 = const()[name = string("op_1688_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578728064)))];
+            tensor<fp16, [1280]> blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578730688)))];
+            tensor<fp16, [1, 1500, 1280]> var_1688_cast_fp16 = layer_norm(axes = var_1688_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_1614_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_181_cast_fp16)[name = string("op_1688_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1697_to_fp16 = const()[name = string("op_1697_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578733312)))];
+            tensor<fp16, [5120]> var_1698_to_fp16 = const()[name = string("op_1698_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(591840576)))];
+            tensor<fp16, [1, 1500, 5120]> linear_88_cast_fp16 = linear(bias = var_1698_to_fp16, weight = var_1697_to_fp16, x = var_1688_cast_fp16)[name = string("linear_88_cast_fp16")];
+            string x_185_mode_0 = const()[name = string("x_185_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_185_cast_fp16 = gelu(mode = x_185_mode_0, x = linear_88_cast_fp16)[name = string("x_185_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1703_to_fp16 = const()[name = string("op_1703_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(591850880)))];
+            tensor<fp16, [1280]> var_1704_to_fp16 = const()[name = string("op_1704_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604958144)))];
+            tensor<fp16, [1, 1500, 1280]> linear_89_cast_fp16 = linear(bias = var_1704_to_fp16, weight = var_1703_to_fp16, x = x_185_cast_fp16)[name = string("linear_89_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_187_cast_fp16 = add(x = x_181_cast_fp16, y = linear_89_cast_fp16)[name = string("x_187_cast_fp16")];
+            int32 var_1714 = const()[name = string("op_1714"), val = int32(-1)];
+            tensor<int32, [1]> var_1730_axes_0 = const()[name = string("op_1730_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604960768)))];
+            tensor<fp16, [1280]> blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604963392)))];
+            fp16 var_1720_to_fp16 = const()[name = string("op_1720_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1730_cast_fp16 = layer_norm(axes = var_1730_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_1720_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_187_cast_fp16)[name = string("op_1730_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1741_to_fp16 = const()[name = string("op_1741_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604966016)))];
+            tensor<fp16, [1280]> var_1742_to_fp16 = const()[name = string("op_1742_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(608242880)))];
+            tensor<fp16, [1, 1500, 1280]> linear_90_cast_fp16 = linear(bias = var_1742_to_fp16, weight = var_1741_to_fp16, x = var_1730_cast_fp16)[name = string("linear_90_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1745_to_fp16 = const()[name = string("op_1745_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(608245504)))];
+            tensor<fp16, [1, 1500, 1280]> linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1745_to_fp16, x = var_1730_cast_fp16)[name = string("linear_91_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1749_to_fp16 = const()[name = string("op_1749_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(611522368)))];
+            tensor<fp16, [1280]> var_1750_to_fp16 = const()[name = string("op_1750_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(614799232)))];
+            tensor<fp16, [1, 1500, 1280]> linear_92_cast_fp16 = linear(bias = var_1750_to_fp16, weight = var_1749_to_fp16, x = var_1730_cast_fp16)[name = string("linear_92_cast_fp16")];
+            tensor<int32, [4]> var_1758 = const()[name = string("op_1758"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1759_cast_fp16 = reshape(shape = var_1758, x = linear_90_cast_fp16)[name = string("op_1759_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_254_to_fp16 = const()[name = string("const_254_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_63_cast_fp16 = mul(x = var_1759_cast_fp16, y = const_254_to_fp16)[name = string("q_63_cast_fp16")];
+            tensor<int32, [4]> var_1765 = const()[name = string("op_1765"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1766_cast_fp16 = reshape(shape = var_1765, x = linear_91_cast_fp16)[name = string("op_1766_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_255_to_fp16 = const()[name = string("const_255_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_63_cast_fp16 = mul(x = var_1766_cast_fp16, y = const_255_to_fp16)[name = string("k_63_cast_fp16")];
+            tensor<int32, [4]> var_1772 = const()[name = string("op_1772"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1773_cast_fp16 = reshape(shape = var_1772, x = linear_92_cast_fp16)[name = string("op_1773_cast_fp16")];
+            tensor<int32, [4]> var_1774 = const()[name = string("op_1774"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)];
+            bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_95 = transpose(perm = transpose_95_perm_0, x = k_63_cast_fp16)[name = string("transpose_98")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_94 = transpose(perm = transpose_94_perm_0, x = q_63_cast_fp16)[name = string("transpose_99")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_94, y = transpose_95)[name = string("qk_31_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1778_cast_fp16 = softmax(axis = var_1714, x = qk_31_cast_fp16)[name = string("op_1778_cast_fp16")];
+            bool var_1780_transpose_x_0 = const()[name = string("op_1780_transpose_x_0"), val = bool(false)];
+            bool var_1780_transpose_y_0 = const()[name = string("op_1780_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_63_cast_fp16 = transpose(perm = var_1774, x = var_1773_cast_fp16)[name = string("transpose_97")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1780_cast_fp16 = matmul(transpose_x = var_1780_transpose_x_0, transpose_y = var_1780_transpose_y_0, x = var_1778_cast_fp16, y = v_63_cast_fp16)[name = string("op_1780_cast_fp16")];
+            tensor<int32, [4]> var_1781 = const()[name = string("op_1781"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_15 = const()[name = string("concat_15"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1782_cast_fp16 = transpose(perm = var_1781, x = var_1780_cast_fp16)[name = string("transpose_96")];
+            tensor<fp16, [1, 1500, 1280]> x_191_cast_fp16 = reshape(shape = concat_15, x = var_1782_cast_fp16)[name = string("x_191_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1786_to_fp16 = const()[name = string("op_1786_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(614801856)))];
+            tensor<fp16, [1280]> var_1787_to_fp16 = const()[name = string("op_1787_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(618078720)))];
+            tensor<fp16, [1, 1500, 1280]> linear_93_cast_fp16 = linear(bias = var_1787_to_fp16, weight = var_1786_to_fp16, x = x_191_cast_fp16)[name = string("linear_93_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_193_cast_fp16_1 = add(x = x_187_cast_fp16, y = linear_93_cast_fp16)[name = string("x_193_cast_fp16")];
+            tensor<int32, [1]> var_1794_axes_0 = const()[name = string("op_1794_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(618081344)))];
+            tensor<fp16, [1280]> blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(618083968)))];
+            tensor<fp16, [1, 1500, 1280]> var_1794_cast_fp16 = layer_norm(axes = var_1794_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_1720_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_193_cast_fp16_1)[name = string("op_1794_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1803_to_fp16 = const()[name = string("op_1803_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(618086592)))];
+            tensor<fp16, [5120]> var_1804_to_fp16 = const()[name = string("op_1804_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(631193856)))];
+            tensor<fp16, [1, 1500, 5120]> linear_94_cast_fp16 = linear(bias = var_1804_to_fp16, weight = var_1803_to_fp16, x = var_1794_cast_fp16)[name = string("linear_94_cast_fp16")];
+            string x_197_mode_0 = const()[name = string("x_197_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_197_cast_fp16 = gelu(mode = x_197_mode_0, x = linear_94_cast_fp16)[name = string("x_197_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1809_to_fp16 = const()[name = string("op_1809_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(631204160)))];
+            tensor<fp16, [1280]> var_1810_to_fp16 = const()[name = string("op_1810_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(644311424)))];
+            tensor<fp16, [1, 1500, 1280]> linear_95_cast_fp16_1 = linear(bias = var_1810_to_fp16, weight = var_1809_to_fp16, x = x_197_cast_fp16)[name = string("linear_95_cast_fp16")];
+            string linear_95_cast_fp16_dtype_0 = const()[name = string("linear_95_cast_fp16_dtype_0"), val = string("fp32")];
+            string x_193_cast_fp16_dtype_0 = const()[name = string("x_193_cast_fp16_dtype_0"), val = string("fp32")];
+            tensor<fp32, [1, 1500, 1280]> x_193_cast_fp16 = cast(dtype = x_193_cast_fp16_dtype_0, x = x_193_cast_fp16_1)[name = string("cast_2")];
+            tensor<fp32, [1, 1500, 1280]> linear_95_cast_fp16 = cast(dtype = linear_95_cast_fp16_dtype_0, x = linear_95_cast_fp16_1)[name = string("cast_3")];
+        } -> (linear_95_cast_fp16, x_193_cast_fp16);
+}
\ No newline at end of file
diff --git a/large-v3/encoder.mlmodelc/model0/weights/0-weight.bin b/large-v3/encoder.mlmodelc/model0/weights/0-weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..86bc91dc5f3c78b8d21adbe8207e7316052e5112
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/model0/weights/0-weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:593ca90475cb8284e5c5a84580932a05e0cc3fe872e4804b4b1f6d7fba16c21c
+size 644314048
diff --git a/large-v3/encoder.mlmodelc/model1/analytics/coremldata.bin b/large-v3/encoder.mlmodelc/model1/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5ed18ae44ab3d09ffbed846536c84109f12b19b1
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/model1/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a8281049b2a65a3be541cfd9f949e84b8fe1c5251ce90e46da1626fed54e58a
+size 108
diff --git a/large-v3/encoder.mlmodelc/model1/coremldata.bin b/large-v3/encoder.mlmodelc/model1/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2ea59338ab416594015715ac6994e32a8c96e239
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/model1/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70195139816248a2b1fbef695f96decb60b35af6f364f84a7d2293a3d0a09e11
+size 196
diff --git a/large-v3/encoder.mlmodelc/model1/model.mil b/large-v3/encoder.mlmodelc/model1/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..7d3b026fa91fad416f7820629ab7ce05c46aad69
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/model1/model.mil
@@ -0,0 +1,945 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
+{
+    func main<ios18>(tensor<fp32, [1, 1500, 1280]> linear_95_cast_fp16, tensor<fp32, [1, 1500, 1280]> x_193_cast_fp16) {
+            tensor<fp16, [1280]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(64)))];
+            string cast_1_dtype_0 = const()[name = string("cast_1_dtype_0"), val = string("fp16")];
+            string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("fp16")];
+            tensor<fp16, [1, 1500, 1280]> cast_0 = cast(dtype = cast_0_dtype_0, x = linear_95_cast_fp16)[name = string("cast_0")];
+            tensor<fp16, [1, 1500, 1280]> cast_1 = cast(dtype = cast_1_dtype_0, x = x_193_cast_fp16)[name = string("cast_1")];
+            tensor<fp16, [1, 1500, 1280]> x_199_cast_fp16 = add(x = cast_1, y = cast_0)[name = string("x_199_cast_fp16")];
+            int32 var_1820 = const()[name = string("op_1820"), val = int32(-1)];
+            tensor<int32, [1]> var_1836_axes_0 = const()[name = string("op_1836_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(2688)))];
+            tensor<fp16, [1280]> blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(5312)))];
+            fp16 var_1826_to_fp16 = const()[name = string("op_1826_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1836_cast_fp16 = layer_norm(axes = var_1836_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_1826_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_199_cast_fp16)[name = string("op_1836_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1847_to_fp16 = const()[name = string("op_1847_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(7936)))];
+            tensor<fp16, [1280]> var_1848_to_fp16 = const()[name = string("op_1848_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(3284800)))];
+            tensor<fp16, [1, 1500, 1280]> linear_96_cast_fp16 = linear(bias = var_1848_to_fp16, weight = var_1847_to_fp16, x = var_1836_cast_fp16)[name = string("linear_96_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1851_to_fp16 = const()[name = string("op_1851_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(3287424)))];
+            tensor<fp16, [1, 1500, 1280]> linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1851_to_fp16, x = var_1836_cast_fp16)[name = string("linear_97_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1855_to_fp16 = const()[name = string("op_1855_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(6564288)))];
+            tensor<fp16, [1280]> var_1856_to_fp16 = const()[name = string("op_1856_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(9841152)))];
+            tensor<fp16, [1, 1500, 1280]> linear_98_cast_fp16 = linear(bias = var_1856_to_fp16, weight = var_1855_to_fp16, x = var_1836_cast_fp16)[name = string("linear_98_cast_fp16")];
+            tensor<int32, [4]> var_1864 = const()[name = string("op_1864"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1865_cast_fp16 = reshape(shape = var_1864, x = linear_96_cast_fp16)[name = string("op_1865_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_256_to_fp16 = const()[name = string("const_256_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_67_cast_fp16 = mul(x = var_1865_cast_fp16, y = const_256_to_fp16)[name = string("q_67_cast_fp16")];
+            tensor<int32, [4]> var_1871 = const()[name = string("op_1871"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1872_cast_fp16 = reshape(shape = var_1871, x = linear_97_cast_fp16)[name = string("op_1872_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_257_to_fp16 = const()[name = string("const_257_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_67_cast_fp16 = mul(x = var_1872_cast_fp16, y = const_257_to_fp16)[name = string("k_67_cast_fp16")];
+            tensor<int32, [4]> var_1878 = const()[name = string("op_1878"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1879_cast_fp16 = reshape(shape = var_1878, x = linear_98_cast_fp16)[name = string("op_1879_cast_fp16")];
+            tensor<int32, [4]> var_1880 = const()[name = string("op_1880"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_33_transpose_x_0 = const()[name = string("qk_33_transpose_x_0"), val = bool(false)];
+            bool qk_33_transpose_y_0 = const()[name = string("qk_33_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_65 = transpose(perm = transpose_65_perm_0, x = k_67_cast_fp16)[name = string("transpose_158")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_64 = transpose(perm = transpose_64_perm_0, x = q_67_cast_fp16)[name = string("transpose_159")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_33_cast_fp16 = matmul(transpose_x = qk_33_transpose_x_0, transpose_y = qk_33_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("qk_33_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1884_cast_fp16 = softmax(axis = var_1820, x = qk_33_cast_fp16)[name = string("op_1884_cast_fp16")];
+            bool var_1886_transpose_x_0 = const()[name = string("op_1886_transpose_x_0"), val = bool(false)];
+            bool var_1886_transpose_y_0 = const()[name = string("op_1886_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_67_cast_fp16 = transpose(perm = var_1880, x = var_1879_cast_fp16)[name = string("transpose_157")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1886_cast_fp16 = matmul(transpose_x = var_1886_transpose_x_0, transpose_y = var_1886_transpose_y_0, x = var_1884_cast_fp16, y = v_67_cast_fp16)[name = string("op_1886_cast_fp16")];
+            tensor<int32, [4]> var_1887 = const()[name = string("op_1887"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_16 = const()[name = string("concat_16"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1888_cast_fp16 = transpose(perm = var_1887, x = var_1886_cast_fp16)[name = string("transpose_156")];
+            tensor<fp16, [1, 1500, 1280]> x_203_cast_fp16 = reshape(shape = concat_16, x = var_1888_cast_fp16)[name = string("x_203_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1892_to_fp16 = const()[name = string("op_1892_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(9843776)))];
+            tensor<fp16, [1280]> var_1893_to_fp16 = const()[name = string("op_1893_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13120640)))];
+            tensor<fp16, [1, 1500, 1280]> linear_99_cast_fp16 = linear(bias = var_1893_to_fp16, weight = var_1892_to_fp16, x = x_203_cast_fp16)[name = string("linear_99_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_205_cast_fp16 = add(x = x_199_cast_fp16, y = linear_99_cast_fp16)[name = string("x_205_cast_fp16")];
+            tensor<int32, [1]> var_1900_axes_0 = const()[name = string("op_1900_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13123264)))];
+            tensor<fp16, [1280]> blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13125888)))];
+            tensor<fp16, [1, 1500, 1280]> var_1900_cast_fp16 = layer_norm(axes = var_1900_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_1826_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_205_cast_fp16)[name = string("op_1900_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_1909_to_fp16 = const()[name = string("op_1909_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13128512)))];
+            tensor<fp16, [5120]> var_1910_to_fp16 = const()[name = string("op_1910_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(26235776)))];
+            tensor<fp16, [1, 1500, 5120]> linear_100_cast_fp16 = linear(bias = var_1910_to_fp16, weight = var_1909_to_fp16, x = var_1900_cast_fp16)[name = string("linear_100_cast_fp16")];
+            string x_209_mode_0 = const()[name = string("x_209_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_209_cast_fp16 = gelu(mode = x_209_mode_0, x = linear_100_cast_fp16)[name = string("x_209_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(26246080)))];
+            tensor<fp16, [1280]> var_1916_to_fp16 = const()[name = string("op_1916_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39353344)))];
+            tensor<fp16, [1, 1500, 1280]> linear_101_cast_fp16 = linear(bias = var_1916_to_fp16, weight = var_1915_to_fp16, x = x_209_cast_fp16)[name = string("linear_101_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_211_cast_fp16 = add(x = x_205_cast_fp16, y = linear_101_cast_fp16)[name = string("x_211_cast_fp16")];
+            int32 var_1926 = const()[name = string("op_1926"), val = int32(-1)];
+            tensor<int32, [1]> var_1942_axes_0 = const()[name = string("op_1942_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39355968)))];
+            tensor<fp16, [1280]> blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39358592)))];
+            fp16 var_1932_to_fp16 = const()[name = string("op_1932_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_1942_cast_fp16 = layer_norm(axes = var_1942_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_1932_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_211_cast_fp16)[name = string("op_1942_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39361216)))];
+            tensor<fp16, [1280]> var_1954_to_fp16 = const()[name = string("op_1954_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(42638080)))];
+            tensor<fp16, [1, 1500, 1280]> linear_102_cast_fp16 = linear(bias = var_1954_to_fp16, weight = var_1953_to_fp16, x = var_1942_cast_fp16)[name = string("linear_102_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1957_to_fp16 = const()[name = string("op_1957_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(42640704)))];
+            tensor<fp16, [1, 1500, 1280]> linear_103_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1957_to_fp16, x = var_1942_cast_fp16)[name = string("linear_103_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1961_to_fp16 = const()[name = string("op_1961_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(45917568)))];
+            tensor<fp16, [1280]> var_1962_to_fp16 = const()[name = string("op_1962_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(49194432)))];
+            tensor<fp16, [1, 1500, 1280]> linear_104_cast_fp16 = linear(bias = var_1962_to_fp16, weight = var_1961_to_fp16, x = var_1942_cast_fp16)[name = string("linear_104_cast_fp16")];
+            tensor<int32, [4]> var_1970 = const()[name = string("op_1970"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1971_cast_fp16 = reshape(shape = var_1970, x = linear_102_cast_fp16)[name = string("op_1971_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_71_cast_fp16 = mul(x = var_1971_cast_fp16, y = const_258_to_fp16)[name = string("q_71_cast_fp16")];
+            tensor<int32, [4]> var_1977 = const()[name = string("op_1977"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1978_cast_fp16 = reshape(shape = var_1977, x = linear_103_cast_fp16)[name = string("op_1978_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_259_to_fp16 = const()[name = string("const_259_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_71_cast_fp16 = mul(x = var_1978_cast_fp16, y = const_259_to_fp16)[name = string("k_71_cast_fp16")];
+            tensor<int32, [4]> var_1984 = const()[name = string("op_1984"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1985_cast_fp16 = reshape(shape = var_1984, x = linear_104_cast_fp16)[name = string("op_1985_cast_fp16")];
+            tensor<int32, [4]> var_1986 = const()[name = string("op_1986"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)];
+            bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_67 = transpose(perm = transpose_67_perm_0, x = k_71_cast_fp16)[name = string("transpose_154")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_66 = transpose(perm = transpose_66_perm_0, x = q_71_cast_fp16)[name = string("transpose_155")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("qk_35_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1990_cast_fp16 = softmax(axis = var_1926, x = qk_35_cast_fp16)[name = string("op_1990_cast_fp16")];
+            bool var_1992_transpose_x_0 = const()[name = string("op_1992_transpose_x_0"), val = bool(false)];
+            bool var_1992_transpose_y_0 = const()[name = string("op_1992_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_71_cast_fp16 = transpose(perm = var_1986, x = var_1985_cast_fp16)[name = string("transpose_153")];
+            tensor<fp16, [1, 20, 1500, 64]> var_1992_cast_fp16 = matmul(transpose_x = var_1992_transpose_x_0, transpose_y = var_1992_transpose_y_0, x = var_1990_cast_fp16, y = v_71_cast_fp16)[name = string("op_1992_cast_fp16")];
+            tensor<int32, [4]> var_1993 = const()[name = string("op_1993"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_1994_cast_fp16 = transpose(perm = var_1993, x = var_1992_cast_fp16)[name = string("transpose_152")];
+            tensor<fp16, [1, 1500, 1280]> x_215_cast_fp16 = reshape(shape = concat_17, x = var_1994_cast_fp16)[name = string("x_215_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_1998_to_fp16 = const()[name = string("op_1998_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(49197056)))];
+            tensor<fp16, [1280]> var_1999_to_fp16 = const()[name = string("op_1999_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52473920)))];
+            tensor<fp16, [1, 1500, 1280]> linear_105_cast_fp16 = linear(bias = var_1999_to_fp16, weight = var_1998_to_fp16, x = x_215_cast_fp16)[name = string("linear_105_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_217_cast_fp16 = add(x = x_211_cast_fp16, y = linear_105_cast_fp16)[name = string("x_217_cast_fp16")];
+            tensor<int32, [1]> var_2006_axes_0 = const()[name = string("op_2006_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52476544)))];
+            tensor<fp16, [1280]> blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52479168)))];
+            tensor<fp16, [1, 1500, 1280]> var_2006_cast_fp16 = layer_norm(axes = var_2006_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_1932_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_217_cast_fp16)[name = string("op_2006_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2015_to_fp16 = const()[name = string("op_2015_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52481792)))];
+            tensor<fp16, [5120]> var_2016_to_fp16 = const()[name = string("op_2016_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(65589056)))];
+            tensor<fp16, [1, 1500, 5120]> linear_106_cast_fp16 = linear(bias = var_2016_to_fp16, weight = var_2015_to_fp16, x = var_2006_cast_fp16)[name = string("linear_106_cast_fp16")];
+            string x_221_mode_0 = const()[name = string("x_221_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_221_cast_fp16 = gelu(mode = x_221_mode_0, x = linear_106_cast_fp16)[name = string("x_221_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2021_to_fp16 = const()[name = string("op_2021_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(65599360)))];
+            tensor<fp16, [1280]> var_2022_to_fp16 = const()[name = string("op_2022_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78706624)))];
+            tensor<fp16, [1, 1500, 1280]> linear_107_cast_fp16 = linear(bias = var_2022_to_fp16, weight = var_2021_to_fp16, x = x_221_cast_fp16)[name = string("linear_107_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_223_cast_fp16 = add(x = x_217_cast_fp16, y = linear_107_cast_fp16)[name = string("x_223_cast_fp16")];
+            int32 var_2032 = const()[name = string("op_2032"), val = int32(-1)];
+            tensor<int32, [1]> var_2048_axes_0 = const()[name = string("op_2048_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78709248)))];
+            tensor<fp16, [1280]> blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78711872)))];
+            fp16 var_2038_to_fp16 = const()[name = string("op_2038_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2048_cast_fp16 = layer_norm(axes = var_2048_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_2038_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_223_cast_fp16)[name = string("op_2048_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2059_to_fp16 = const()[name = string("op_2059_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78714496)))];
+            tensor<fp16, [1280]> var_2060_to_fp16 = const()[name = string("op_2060_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(81991360)))];
+            tensor<fp16, [1, 1500, 1280]> linear_108_cast_fp16 = linear(bias = var_2060_to_fp16, weight = var_2059_to_fp16, x = var_2048_cast_fp16)[name = string("linear_108_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2063_to_fp16 = const()[name = string("op_2063_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(81993984)))];
+            tensor<fp16, [1, 1500, 1280]> linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2063_to_fp16, x = var_2048_cast_fp16)[name = string("linear_109_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2067_to_fp16 = const()[name = string("op_2067_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(85270848)))];
+            tensor<fp16, [1280]> var_2068_to_fp16 = const()[name = string("op_2068_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(88547712)))];
+            tensor<fp16, [1, 1500, 1280]> linear_110_cast_fp16 = linear(bias = var_2068_to_fp16, weight = var_2067_to_fp16, x = var_2048_cast_fp16)[name = string("linear_110_cast_fp16")];
+            tensor<int32, [4]> var_2076 = const()[name = string("op_2076"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2077_cast_fp16 = reshape(shape = var_2076, x = linear_108_cast_fp16)[name = string("op_2077_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_75_cast_fp16 = mul(x = var_2077_cast_fp16, y = const_260_to_fp16)[name = string("q_75_cast_fp16")];
+            tensor<int32, [4]> var_2083 = const()[name = string("op_2083"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2084_cast_fp16 = reshape(shape = var_2083, x = linear_109_cast_fp16)[name = string("op_2084_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_75_cast_fp16 = mul(x = var_2084_cast_fp16, y = const_261_to_fp16)[name = string("k_75_cast_fp16")];
+            tensor<int32, [4]> var_2090 = const()[name = string("op_2090"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2091_cast_fp16 = reshape(shape = var_2090, x = linear_110_cast_fp16)[name = string("op_2091_cast_fp16")];
+            tensor<int32, [4]> var_2092 = const()[name = string("op_2092"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)];
+            bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_69 = transpose(perm = transpose_69_perm_0, x = k_75_cast_fp16)[name = string("transpose_150")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_68 = transpose(perm = transpose_68_perm_0, x = q_75_cast_fp16)[name = string("transpose_151")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("qk_37_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2096_cast_fp16 = softmax(axis = var_2032, x = qk_37_cast_fp16)[name = string("op_2096_cast_fp16")];
+            bool var_2098_transpose_x_0 = const()[name = string("op_2098_transpose_x_0"), val = bool(false)];
+            bool var_2098_transpose_y_0 = const()[name = string("op_2098_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_75_cast_fp16 = transpose(perm = var_2092, x = var_2091_cast_fp16)[name = string("transpose_149")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2098_cast_fp16 = matmul(transpose_x = var_2098_transpose_x_0, transpose_y = var_2098_transpose_y_0, x = var_2096_cast_fp16, y = v_75_cast_fp16)[name = string("op_2098_cast_fp16")];
+            tensor<int32, [4]> var_2099 = const()[name = string("op_2099"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2100_cast_fp16 = transpose(perm = var_2099, x = var_2098_cast_fp16)[name = string("transpose_148")];
+            tensor<fp16, [1, 1500, 1280]> x_227_cast_fp16 = reshape(shape = concat_18, x = var_2100_cast_fp16)[name = string("x_227_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2104_to_fp16 = const()[name = string("op_2104_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(88550336)))];
+            tensor<fp16, [1280]> var_2105_to_fp16 = const()[name = string("op_2105_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91827200)))];
+            tensor<fp16, [1, 1500, 1280]> linear_111_cast_fp16 = linear(bias = var_2105_to_fp16, weight = var_2104_to_fp16, x = x_227_cast_fp16)[name = string("linear_111_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_229_cast_fp16 = add(x = x_223_cast_fp16, y = linear_111_cast_fp16)[name = string("x_229_cast_fp16")];
+            tensor<int32, [1]> var_2112_axes_0 = const()[name = string("op_2112_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91829824)))];
+            tensor<fp16, [1280]> blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91832448)))];
+            tensor<fp16, [1, 1500, 1280]> var_2112_cast_fp16 = layer_norm(axes = var_2112_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_2038_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_229_cast_fp16)[name = string("op_2112_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2121_to_fp16 = const()[name = string("op_2121_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91835072)))];
+            tensor<fp16, [5120]> var_2122_to_fp16 = const()[name = string("op_2122_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(104942336)))];
+            tensor<fp16, [1, 1500, 5120]> linear_112_cast_fp16 = linear(bias = var_2122_to_fp16, weight = var_2121_to_fp16, x = var_2112_cast_fp16)[name = string("linear_112_cast_fp16")];
+            string x_233_mode_0 = const()[name = string("x_233_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_233_cast_fp16 = gelu(mode = x_233_mode_0, x = linear_112_cast_fp16)[name = string("x_233_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2127_to_fp16 = const()[name = string("op_2127_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(104952640)))];
+            tensor<fp16, [1280]> var_2128_to_fp16 = const()[name = string("op_2128_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118059904)))];
+            tensor<fp16, [1, 1500, 1280]> linear_113_cast_fp16 = linear(bias = var_2128_to_fp16, weight = var_2127_to_fp16, x = x_233_cast_fp16)[name = string("linear_113_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_235_cast_fp16 = add(x = x_229_cast_fp16, y = linear_113_cast_fp16)[name = string("x_235_cast_fp16")];
+            int32 var_2138 = const()[name = string("op_2138"), val = int32(-1)];
+            tensor<int32, [1]> var_2154_axes_0 = const()[name = string("op_2154_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118062528)))];
+            tensor<fp16, [1280]> blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118065152)))];
+            fp16 var_2144_to_fp16 = const()[name = string("op_2144_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2154_cast_fp16 = layer_norm(axes = var_2154_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_2144_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_235_cast_fp16)[name = string("op_2154_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2165_to_fp16 = const()[name = string("op_2165_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118067776)))];
+            tensor<fp16, [1280]> var_2166_to_fp16 = const()[name = string("op_2166_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(121344640)))];
+            tensor<fp16, [1, 1500, 1280]> linear_114_cast_fp16 = linear(bias = var_2166_to_fp16, weight = var_2165_to_fp16, x = var_2154_cast_fp16)[name = string("linear_114_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2169_to_fp16 = const()[name = string("op_2169_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(121347264)))];
+            tensor<fp16, [1, 1500, 1280]> linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2169_to_fp16, x = var_2154_cast_fp16)[name = string("linear_115_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2173_to_fp16 = const()[name = string("op_2173_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(124624128)))];
+            tensor<fp16, [1280]> var_2174_to_fp16 = const()[name = string("op_2174_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(127900992)))];
+            tensor<fp16, [1, 1500, 1280]> linear_116_cast_fp16 = linear(bias = var_2174_to_fp16, weight = var_2173_to_fp16, x = var_2154_cast_fp16)[name = string("linear_116_cast_fp16")];
+            tensor<int32, [4]> var_2182 = const()[name = string("op_2182"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2183_cast_fp16 = reshape(shape = var_2182, x = linear_114_cast_fp16)[name = string("op_2183_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_262_to_fp16 = const()[name = string("const_262_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_79_cast_fp16 = mul(x = var_2183_cast_fp16, y = const_262_to_fp16)[name = string("q_79_cast_fp16")];
+            tensor<int32, [4]> var_2189 = const()[name = string("op_2189"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2190_cast_fp16 = reshape(shape = var_2189, x = linear_115_cast_fp16)[name = string("op_2190_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_79_cast_fp16 = mul(x = var_2190_cast_fp16, y = const_263_to_fp16)[name = string("k_79_cast_fp16")];
+            tensor<int32, [4]> var_2196 = const()[name = string("op_2196"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2197_cast_fp16 = reshape(shape = var_2196, x = linear_116_cast_fp16)[name = string("op_2197_cast_fp16")];
+            tensor<int32, [4]> var_2198 = const()[name = string("op_2198"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_39_transpose_x_0 = const()[name = string("qk_39_transpose_x_0"), val = bool(false)];
+            bool qk_39_transpose_y_0 = const()[name = string("qk_39_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_71 = transpose(perm = transpose_71_perm_0, x = k_79_cast_fp16)[name = string("transpose_146")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_70 = transpose(perm = transpose_70_perm_0, x = q_79_cast_fp16)[name = string("transpose_147")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_39_cast_fp16 = matmul(transpose_x = qk_39_transpose_x_0, transpose_y = qk_39_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("qk_39_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2202_cast_fp16 = softmax(axis = var_2138, x = qk_39_cast_fp16)[name = string("op_2202_cast_fp16")];
+            bool var_2204_transpose_x_0 = const()[name = string("op_2204_transpose_x_0"), val = bool(false)];
+            bool var_2204_transpose_y_0 = const()[name = string("op_2204_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_79_cast_fp16 = transpose(perm = var_2198, x = var_2197_cast_fp16)[name = string("transpose_145")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2204_cast_fp16 = matmul(transpose_x = var_2204_transpose_x_0, transpose_y = var_2204_transpose_y_0, x = var_2202_cast_fp16, y = v_79_cast_fp16)[name = string("op_2204_cast_fp16")];
+            tensor<int32, [4]> var_2205 = const()[name = string("op_2205"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2206_cast_fp16 = transpose(perm = var_2205, x = var_2204_cast_fp16)[name = string("transpose_144")];
+            tensor<fp16, [1, 1500, 1280]> x_239_cast_fp16 = reshape(shape = concat_19, x = var_2206_cast_fp16)[name = string("x_239_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2210_to_fp16 = const()[name = string("op_2210_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(127903616)))];
+            tensor<fp16, [1280]> var_2211_to_fp16 = const()[name = string("op_2211_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131180480)))];
+            tensor<fp16, [1, 1500, 1280]> linear_117_cast_fp16 = linear(bias = var_2211_to_fp16, weight = var_2210_to_fp16, x = x_239_cast_fp16)[name = string("linear_117_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_241_cast_fp16 = add(x = x_235_cast_fp16, y = linear_117_cast_fp16)[name = string("x_241_cast_fp16")];
+            tensor<int32, [1]> var_2218_axes_0 = const()[name = string("op_2218_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131183104)))];
+            tensor<fp16, [1280]> blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131185728)))];
+            tensor<fp16, [1, 1500, 1280]> var_2218_cast_fp16 = layer_norm(axes = var_2218_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_2144_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_241_cast_fp16)[name = string("op_2218_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2227_to_fp16 = const()[name = string("op_2227_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131188352)))];
+            tensor<fp16, [5120]> var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(144295616)))];
+            tensor<fp16, [1, 1500, 5120]> linear_118_cast_fp16 = linear(bias = var_2228_to_fp16, weight = var_2227_to_fp16, x = var_2218_cast_fp16)[name = string("linear_118_cast_fp16")];
+            string x_245_mode_0 = const()[name = string("x_245_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_245_cast_fp16 = gelu(mode = x_245_mode_0, x = linear_118_cast_fp16)[name = string("x_245_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2233_to_fp16 = const()[name = string("op_2233_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(144305920)))];
+            tensor<fp16, [1280]> var_2234_to_fp16 = const()[name = string("op_2234_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157413184)))];
+            tensor<fp16, [1, 1500, 1280]> linear_119_cast_fp16 = linear(bias = var_2234_to_fp16, weight = var_2233_to_fp16, x = x_245_cast_fp16)[name = string("linear_119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_247_cast_fp16 = add(x = x_241_cast_fp16, y = linear_119_cast_fp16)[name = string("x_247_cast_fp16")];
+            int32 var_2244 = const()[name = string("op_2244"), val = int32(-1)];
+            tensor<int32, [1]> var_2260_axes_0 = const()[name = string("op_2260_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157415808)))];
+            tensor<fp16, [1280]> blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157418432)))];
+            fp16 var_2250_to_fp16 = const()[name = string("op_2250_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2260_cast_fp16 = layer_norm(axes = var_2260_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_2250_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_247_cast_fp16)[name = string("op_2260_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2271_to_fp16 = const()[name = string("op_2271_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157421056)))];
+            tensor<fp16, [1280]> var_2272_to_fp16 = const()[name = string("op_2272_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(160697920)))];
+            tensor<fp16, [1, 1500, 1280]> linear_120_cast_fp16 = linear(bias = var_2272_to_fp16, weight = var_2271_to_fp16, x = var_2260_cast_fp16)[name = string("linear_120_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2275_to_fp16 = const()[name = string("op_2275_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(160700544)))];
+            tensor<fp16, [1, 1500, 1280]> linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2275_to_fp16, x = var_2260_cast_fp16)[name = string("linear_121_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2279_to_fp16 = const()[name = string("op_2279_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(163977408)))];
+            tensor<fp16, [1280]> var_2280_to_fp16 = const()[name = string("op_2280_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(167254272)))];
+            tensor<fp16, [1, 1500, 1280]> linear_122_cast_fp16 = linear(bias = var_2280_to_fp16, weight = var_2279_to_fp16, x = var_2260_cast_fp16)[name = string("linear_122_cast_fp16")];
+            tensor<int32, [4]> var_2288 = const()[name = string("op_2288"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2289_cast_fp16 = reshape(shape = var_2288, x = linear_120_cast_fp16)[name = string("op_2289_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_264_to_fp16 = const()[name = string("const_264_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_83_cast_fp16 = mul(x = var_2289_cast_fp16, y = const_264_to_fp16)[name = string("q_83_cast_fp16")];
+            tensor<int32, [4]> var_2295 = const()[name = string("op_2295"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2296_cast_fp16 = reshape(shape = var_2295, x = linear_121_cast_fp16)[name = string("op_2296_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_265_to_fp16 = const()[name = string("const_265_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_83_cast_fp16 = mul(x = var_2296_cast_fp16, y = const_265_to_fp16)[name = string("k_83_cast_fp16")];
+            tensor<int32, [4]> var_2302 = const()[name = string("op_2302"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2303_cast_fp16 = reshape(shape = var_2302, x = linear_122_cast_fp16)[name = string("op_2303_cast_fp16")];
+            tensor<int32, [4]> var_2304 = const()[name = string("op_2304"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)];
+            bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_73 = transpose(perm = transpose_73_perm_0, x = k_83_cast_fp16)[name = string("transpose_142")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = q_83_cast_fp16)[name = string("transpose_143")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_72, y = transpose_73)[name = string("qk_41_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2308_cast_fp16 = softmax(axis = var_2244, x = qk_41_cast_fp16)[name = string("op_2308_cast_fp16")];
+            bool var_2310_transpose_x_0 = const()[name = string("op_2310_transpose_x_0"), val = bool(false)];
+            bool var_2310_transpose_y_0 = const()[name = string("op_2310_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_83_cast_fp16 = transpose(perm = var_2304, x = var_2303_cast_fp16)[name = string("transpose_141")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2310_cast_fp16 = matmul(transpose_x = var_2310_transpose_x_0, transpose_y = var_2310_transpose_y_0, x = var_2308_cast_fp16, y = v_83_cast_fp16)[name = string("op_2310_cast_fp16")];
+            tensor<int32, [4]> var_2311 = const()[name = string("op_2311"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2312_cast_fp16 = transpose(perm = var_2311, x = var_2310_cast_fp16)[name = string("transpose_140")];
+            tensor<fp16, [1, 1500, 1280]> x_251_cast_fp16 = reshape(shape = concat_20, x = var_2312_cast_fp16)[name = string("x_251_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2316_to_fp16 = const()[name = string("op_2316_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(167256896)))];
+            tensor<fp16, [1280]> var_2317_to_fp16 = const()[name = string("op_2317_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170533760)))];
+            tensor<fp16, [1, 1500, 1280]> linear_123_cast_fp16 = linear(bias = var_2317_to_fp16, weight = var_2316_to_fp16, x = x_251_cast_fp16)[name = string("linear_123_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_253_cast_fp16 = add(x = x_247_cast_fp16, y = linear_123_cast_fp16)[name = string("x_253_cast_fp16")];
+            tensor<int32, [1]> var_2324_axes_0 = const()[name = string("op_2324_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170536384)))];
+            tensor<fp16, [1280]> blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170539008)))];
+            tensor<fp16, [1, 1500, 1280]> var_2324_cast_fp16 = layer_norm(axes = var_2324_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_2250_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_253_cast_fp16)[name = string("op_2324_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2333_to_fp16 = const()[name = string("op_2333_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170541632)))];
+            tensor<fp16, [5120]> var_2334_to_fp16 = const()[name = string("op_2334_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(183648896)))];
+            tensor<fp16, [1, 1500, 5120]> linear_124_cast_fp16 = linear(bias = var_2334_to_fp16, weight = var_2333_to_fp16, x = var_2324_cast_fp16)[name = string("linear_124_cast_fp16")];
+            string x_257_mode_0 = const()[name = string("x_257_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_257_cast_fp16 = gelu(mode = x_257_mode_0, x = linear_124_cast_fp16)[name = string("x_257_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2339_to_fp16 = const()[name = string("op_2339_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(183659200)))];
+            tensor<fp16, [1280]> var_2340_to_fp16 = const()[name = string("op_2340_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196766464)))];
+            tensor<fp16, [1, 1500, 1280]> linear_125_cast_fp16 = linear(bias = var_2340_to_fp16, weight = var_2339_to_fp16, x = x_257_cast_fp16)[name = string("linear_125_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_259_cast_fp16 = add(x = x_253_cast_fp16, y = linear_125_cast_fp16)[name = string("x_259_cast_fp16")];
+            int32 var_2350 = const()[name = string("op_2350"), val = int32(-1)];
+            tensor<int32, [1]> var_2366_axes_0 = const()[name = string("op_2366_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196769088)))];
+            tensor<fp16, [1280]> blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196771712)))];
+            fp16 var_2356_to_fp16 = const()[name = string("op_2356_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2366_cast_fp16 = layer_norm(axes = var_2366_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_2356_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_259_cast_fp16)[name = string("op_2366_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2377_to_fp16 = const()[name = string("op_2377_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196774336)))];
+            tensor<fp16, [1280]> var_2378_to_fp16 = const()[name = string("op_2378_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(200051200)))];
+            tensor<fp16, [1, 1500, 1280]> linear_126_cast_fp16 = linear(bias = var_2378_to_fp16, weight = var_2377_to_fp16, x = var_2366_cast_fp16)[name = string("linear_126_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2381_to_fp16 = const()[name = string("op_2381_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(200053824)))];
+            tensor<fp16, [1, 1500, 1280]> linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2381_to_fp16, x = var_2366_cast_fp16)[name = string("linear_127_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2385_to_fp16 = const()[name = string("op_2385_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(203330688)))];
+            tensor<fp16, [1280]> var_2386_to_fp16 = const()[name = string("op_2386_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(206607552)))];
+            tensor<fp16, [1, 1500, 1280]> linear_128_cast_fp16 = linear(bias = var_2386_to_fp16, weight = var_2385_to_fp16, x = var_2366_cast_fp16)[name = string("linear_128_cast_fp16")];
+            tensor<int32, [4]> var_2394 = const()[name = string("op_2394"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2395_cast_fp16 = reshape(shape = var_2394, x = linear_126_cast_fp16)[name = string("op_2395_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_266_to_fp16 = const()[name = string("const_266_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_87_cast_fp16 = mul(x = var_2395_cast_fp16, y = const_266_to_fp16)[name = string("q_87_cast_fp16")];
+            tensor<int32, [4]> var_2401 = const()[name = string("op_2401"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2402_cast_fp16 = reshape(shape = var_2401, x = linear_127_cast_fp16)[name = string("op_2402_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_87_cast_fp16 = mul(x = var_2402_cast_fp16, y = const_267_to_fp16)[name = string("k_87_cast_fp16")];
+            tensor<int32, [4]> var_2408 = const()[name = string("op_2408"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2409_cast_fp16 = reshape(shape = var_2408, x = linear_128_cast_fp16)[name = string("op_2409_cast_fp16")];
+            tensor<int32, [4]> var_2410 = const()[name = string("op_2410"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)];
+            bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_75 = transpose(perm = transpose_75_perm_0, x = k_87_cast_fp16)[name = string("transpose_138")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_74 = transpose(perm = transpose_74_perm_0, x = q_87_cast_fp16)[name = string("transpose_139")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_74, y = transpose_75)[name = string("qk_43_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2414_cast_fp16 = softmax(axis = var_2350, x = qk_43_cast_fp16)[name = string("op_2414_cast_fp16")];
+            bool var_2416_transpose_x_0 = const()[name = string("op_2416_transpose_x_0"), val = bool(false)];
+            bool var_2416_transpose_y_0 = const()[name = string("op_2416_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_87_cast_fp16 = transpose(perm = var_2410, x = var_2409_cast_fp16)[name = string("transpose_137")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2416_cast_fp16 = matmul(transpose_x = var_2416_transpose_x_0, transpose_y = var_2416_transpose_y_0, x = var_2414_cast_fp16, y = v_87_cast_fp16)[name = string("op_2416_cast_fp16")];
+            tensor<int32, [4]> var_2417 = const()[name = string("op_2417"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2418_cast_fp16 = transpose(perm = var_2417, x = var_2416_cast_fp16)[name = string("transpose_136")];
+            tensor<fp16, [1, 1500, 1280]> x_263_cast_fp16 = reshape(shape = concat_21, x = var_2418_cast_fp16)[name = string("x_263_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2422_to_fp16 = const()[name = string("op_2422_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(206610176)))];
+            tensor<fp16, [1280]> var_2423_to_fp16 = const()[name = string("op_2423_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209887040)))];
+            tensor<fp16, [1, 1500, 1280]> linear_129_cast_fp16 = linear(bias = var_2423_to_fp16, weight = var_2422_to_fp16, x = x_263_cast_fp16)[name = string("linear_129_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_265_cast_fp16 = add(x = x_259_cast_fp16, y = linear_129_cast_fp16)[name = string("x_265_cast_fp16")];
+            tensor<int32, [1]> var_2430_axes_0 = const()[name = string("op_2430_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209889664)))];
+            tensor<fp16, [1280]> blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209892288)))];
+            tensor<fp16, [1, 1500, 1280]> var_2430_cast_fp16 = layer_norm(axes = var_2430_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_2356_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_265_cast_fp16)[name = string("op_2430_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2439_to_fp16 = const()[name = string("op_2439_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209894912)))];
+            tensor<fp16, [5120]> var_2440_to_fp16 = const()[name = string("op_2440_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(223002176)))];
+            tensor<fp16, [1, 1500, 5120]> linear_130_cast_fp16 = linear(bias = var_2440_to_fp16, weight = var_2439_to_fp16, x = var_2430_cast_fp16)[name = string("linear_130_cast_fp16")];
+            string x_269_mode_0 = const()[name = string("x_269_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_269_cast_fp16 = gelu(mode = x_269_mode_0, x = linear_130_cast_fp16)[name = string("x_269_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2445_to_fp16 = const()[name = string("op_2445_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(223012480)))];
+            tensor<fp16, [1280]> var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236119744)))];
+            tensor<fp16, [1, 1500, 1280]> linear_131_cast_fp16 = linear(bias = var_2446_to_fp16, weight = var_2445_to_fp16, x = x_269_cast_fp16)[name = string("linear_131_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_271_cast_fp16 = add(x = x_265_cast_fp16, y = linear_131_cast_fp16)[name = string("x_271_cast_fp16")];
+            int32 var_2456 = const()[name = string("op_2456"), val = int32(-1)];
+            tensor<int32, [1]> var_2472_axes_0 = const()[name = string("op_2472_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236122368)))];
+            tensor<fp16, [1280]> blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236124992)))];
+            fp16 var_2462_to_fp16 = const()[name = string("op_2462_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2472_cast_fp16 = layer_norm(axes = var_2472_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_2462_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_271_cast_fp16)[name = string("op_2472_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2483_to_fp16 = const()[name = string("op_2483_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236127616)))];
+            tensor<fp16, [1280]> var_2484_to_fp16 = const()[name = string("op_2484_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(239404480)))];
+            tensor<fp16, [1, 1500, 1280]> linear_132_cast_fp16 = linear(bias = var_2484_to_fp16, weight = var_2483_to_fp16, x = var_2472_cast_fp16)[name = string("linear_132_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2487_to_fp16 = const()[name = string("op_2487_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(239407104)))];
+            tensor<fp16, [1, 1500, 1280]> linear_133_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2487_to_fp16, x = var_2472_cast_fp16)[name = string("linear_133_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2491_to_fp16 = const()[name = string("op_2491_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(242683968)))];
+            tensor<fp16, [1280]> var_2492_to_fp16 = const()[name = string("op_2492_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(245960832)))];
+            tensor<fp16, [1, 1500, 1280]> linear_134_cast_fp16 = linear(bias = var_2492_to_fp16, weight = var_2491_to_fp16, x = var_2472_cast_fp16)[name = string("linear_134_cast_fp16")];
+            tensor<int32, [4]> var_2500 = const()[name = string("op_2500"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2501_cast_fp16 = reshape(shape = var_2500, x = linear_132_cast_fp16)[name = string("op_2501_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_268_to_fp16 = const()[name = string("const_268_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_91_cast_fp16 = mul(x = var_2501_cast_fp16, y = const_268_to_fp16)[name = string("q_91_cast_fp16")];
+            tensor<int32, [4]> var_2507 = const()[name = string("op_2507"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2508_cast_fp16 = reshape(shape = var_2507, x = linear_133_cast_fp16)[name = string("op_2508_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_91_cast_fp16 = mul(x = var_2508_cast_fp16, y = const_269_to_fp16)[name = string("k_91_cast_fp16")];
+            tensor<int32, [4]> var_2514 = const()[name = string("op_2514"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2515_cast_fp16 = reshape(shape = var_2514, x = linear_134_cast_fp16)[name = string("op_2515_cast_fp16")];
+            tensor<int32, [4]> var_2516 = const()[name = string("op_2516"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_45_transpose_x_0 = const()[name = string("qk_45_transpose_x_0"), val = bool(false)];
+            bool qk_45_transpose_y_0 = const()[name = string("qk_45_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_77 = transpose(perm = transpose_77_perm_0, x = k_91_cast_fp16)[name = string("transpose_134")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = q_91_cast_fp16)[name = string("transpose_135")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_45_cast_fp16 = matmul(transpose_x = qk_45_transpose_x_0, transpose_y = qk_45_transpose_y_0, x = transpose_76, y = transpose_77)[name = string("qk_45_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2520_cast_fp16 = softmax(axis = var_2456, x = qk_45_cast_fp16)[name = string("op_2520_cast_fp16")];
+            bool var_2522_transpose_x_0 = const()[name = string("op_2522_transpose_x_0"), val = bool(false)];
+            bool var_2522_transpose_y_0 = const()[name = string("op_2522_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_91_cast_fp16 = transpose(perm = var_2516, x = var_2515_cast_fp16)[name = string("transpose_133")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2522_cast_fp16 = matmul(transpose_x = var_2522_transpose_x_0, transpose_y = var_2522_transpose_y_0, x = var_2520_cast_fp16, y = v_91_cast_fp16)[name = string("op_2522_cast_fp16")];
+            tensor<int32, [4]> var_2523 = const()[name = string("op_2523"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_22 = const()[name = string("concat_22"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2524_cast_fp16 = transpose(perm = var_2523, x = var_2522_cast_fp16)[name = string("transpose_132")];
+            tensor<fp16, [1, 1500, 1280]> x_275_cast_fp16 = reshape(shape = concat_22, x = var_2524_cast_fp16)[name = string("x_275_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2528_to_fp16 = const()[name = string("op_2528_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(245963456)))];
+            tensor<fp16, [1280]> var_2529_to_fp16 = const()[name = string("op_2529_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249240320)))];
+            tensor<fp16, [1, 1500, 1280]> linear_135_cast_fp16 = linear(bias = var_2529_to_fp16, weight = var_2528_to_fp16, x = x_275_cast_fp16)[name = string("linear_135_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_277_cast_fp16 = add(x = x_271_cast_fp16, y = linear_135_cast_fp16)[name = string("x_277_cast_fp16")];
+            tensor<int32, [1]> var_2536_axes_0 = const()[name = string("op_2536_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249242944)))];
+            tensor<fp16, [1280]> blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249245568)))];
+            tensor<fp16, [1, 1500, 1280]> var_2536_cast_fp16 = layer_norm(axes = var_2536_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_2462_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_277_cast_fp16)[name = string("op_2536_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2545_to_fp16 = const()[name = string("op_2545_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249248192)))];
+            tensor<fp16, [5120]> var_2546_to_fp16 = const()[name = string("op_2546_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(262355456)))];
+            tensor<fp16, [1, 1500, 5120]> linear_136_cast_fp16 = linear(bias = var_2546_to_fp16, weight = var_2545_to_fp16, x = var_2536_cast_fp16)[name = string("linear_136_cast_fp16")];
+            string x_281_mode_0 = const()[name = string("x_281_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_281_cast_fp16 = gelu(mode = x_281_mode_0, x = linear_136_cast_fp16)[name = string("x_281_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2551_to_fp16 = const()[name = string("op_2551_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(262365760)))];
+            tensor<fp16, [1280]> var_2552_to_fp16 = const()[name = string("op_2552_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275473024)))];
+            tensor<fp16, [1, 1500, 1280]> linear_137_cast_fp16 = linear(bias = var_2552_to_fp16, weight = var_2551_to_fp16, x = x_281_cast_fp16)[name = string("linear_137_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_283_cast_fp16 = add(x = x_277_cast_fp16, y = linear_137_cast_fp16)[name = string("x_283_cast_fp16")];
+            int32 var_2562 = const()[name = string("op_2562"), val = int32(-1)];
+            tensor<int32, [1]> var_2578_axes_0 = const()[name = string("op_2578_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275475648)))];
+            tensor<fp16, [1280]> blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275478272)))];
+            fp16 var_2568_to_fp16 = const()[name = string("op_2568_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2578_cast_fp16 = layer_norm(axes = var_2578_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_2568_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_283_cast_fp16)[name = string("op_2578_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2589_to_fp16 = const()[name = string("op_2589_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275480896)))];
+            tensor<fp16, [1280]> var_2590_to_fp16 = const()[name = string("op_2590_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(278757760)))];
+            tensor<fp16, [1, 1500, 1280]> linear_138_cast_fp16 = linear(bias = var_2590_to_fp16, weight = var_2589_to_fp16, x = var_2578_cast_fp16)[name = string("linear_138_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2593_to_fp16 = const()[name = string("op_2593_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(278760384)))];
+            tensor<fp16, [1, 1500, 1280]> linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2593_to_fp16, x = var_2578_cast_fp16)[name = string("linear_139_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2597_to_fp16 = const()[name = string("op_2597_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(282037248)))];
+            tensor<fp16, [1280]> var_2598_to_fp16 = const()[name = string("op_2598_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(285314112)))];
+            tensor<fp16, [1, 1500, 1280]> linear_140_cast_fp16 = linear(bias = var_2598_to_fp16, weight = var_2597_to_fp16, x = var_2578_cast_fp16)[name = string("linear_140_cast_fp16")];
+            tensor<int32, [4]> var_2606 = const()[name = string("op_2606"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2607_cast_fp16 = reshape(shape = var_2606, x = linear_138_cast_fp16)[name = string("op_2607_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_95_cast_fp16 = mul(x = var_2607_cast_fp16, y = const_270_to_fp16)[name = string("q_95_cast_fp16")];
+            tensor<int32, [4]> var_2613 = const()[name = string("op_2613"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2614_cast_fp16 = reshape(shape = var_2613, x = linear_139_cast_fp16)[name = string("op_2614_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_95_cast_fp16 = mul(x = var_2614_cast_fp16, y = const_271_to_fp16)[name = string("k_95_cast_fp16")];
+            tensor<int32, [4]> var_2620 = const()[name = string("op_2620"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2621_cast_fp16 = reshape(shape = var_2620, x = linear_140_cast_fp16)[name = string("op_2621_cast_fp16")];
+            tensor<int32, [4]> var_2622 = const()[name = string("op_2622"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)];
+            bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_79 = transpose(perm = transpose_79_perm_0, x = k_95_cast_fp16)[name = string("transpose_130")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_78 = transpose(perm = transpose_78_perm_0, x = q_95_cast_fp16)[name = string("transpose_131")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_78, y = transpose_79)[name = string("qk_47_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2626_cast_fp16 = softmax(axis = var_2562, x = qk_47_cast_fp16)[name = string("op_2626_cast_fp16")];
+            bool var_2628_transpose_x_0 = const()[name = string("op_2628_transpose_x_0"), val = bool(false)];
+            bool var_2628_transpose_y_0 = const()[name = string("op_2628_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_95_cast_fp16 = transpose(perm = var_2622, x = var_2621_cast_fp16)[name = string("transpose_129")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2628_cast_fp16 = matmul(transpose_x = var_2628_transpose_x_0, transpose_y = var_2628_transpose_y_0, x = var_2626_cast_fp16, y = v_95_cast_fp16)[name = string("op_2628_cast_fp16")];
+            tensor<int32, [4]> var_2629 = const()[name = string("op_2629"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2630_cast_fp16 = transpose(perm = var_2629, x = var_2628_cast_fp16)[name = string("transpose_128")];
+            tensor<fp16, [1, 1500, 1280]> x_287_cast_fp16 = reshape(shape = concat_23, x = var_2630_cast_fp16)[name = string("x_287_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2634_to_fp16 = const()[name = string("op_2634_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(285316736)))];
+            tensor<fp16, [1280]> var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288593600)))];
+            tensor<fp16, [1, 1500, 1280]> linear_141_cast_fp16 = linear(bias = var_2635_to_fp16, weight = var_2634_to_fp16, x = x_287_cast_fp16)[name = string("linear_141_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_289_cast_fp16 = add(x = x_283_cast_fp16, y = linear_141_cast_fp16)[name = string("x_289_cast_fp16")];
+            tensor<int32, [1]> var_2642_axes_0 = const()[name = string("op_2642_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288596224)))];
+            tensor<fp16, [1280]> blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288598848)))];
+            tensor<fp16, [1, 1500, 1280]> var_2642_cast_fp16 = layer_norm(axes = var_2642_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_2568_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_289_cast_fp16)[name = string("op_2642_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2651_to_fp16 = const()[name = string("op_2651_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288601472)))];
+            tensor<fp16, [5120]> var_2652_to_fp16 = const()[name = string("op_2652_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(301708736)))];
+            tensor<fp16, [1, 1500, 5120]> linear_142_cast_fp16 = linear(bias = var_2652_to_fp16, weight = var_2651_to_fp16, x = var_2642_cast_fp16)[name = string("linear_142_cast_fp16")];
+            string x_293_mode_0 = const()[name = string("x_293_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_293_cast_fp16 = gelu(mode = x_293_mode_0, x = linear_142_cast_fp16)[name = string("x_293_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(301719040)))];
+            tensor<fp16, [1280]> var_2658_to_fp16 = const()[name = string("op_2658_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314826304)))];
+            tensor<fp16, [1, 1500, 1280]> linear_143_cast_fp16 = linear(bias = var_2658_to_fp16, weight = var_2657_to_fp16, x = x_293_cast_fp16)[name = string("linear_143_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_295_cast_fp16 = add(x = x_289_cast_fp16, y = linear_143_cast_fp16)[name = string("x_295_cast_fp16")];
+            int32 var_2668 = const()[name = string("op_2668"), val = int32(-1)];
+            tensor<int32, [1]> var_2684_axes_0 = const()[name = string("op_2684_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_24_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314828928)))];
+            tensor<fp16, [1280]> blocks_24_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314831552)))];
+            fp16 var_2674_to_fp16 = const()[name = string("op_2674_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2684_cast_fp16 = layer_norm(axes = var_2684_axes_0, beta = blocks_24_attn_ln_bias_to_fp16, epsilon = var_2674_to_fp16, gamma = blocks_24_attn_ln_weight_to_fp16, x = x_295_cast_fp16)[name = string("op_2684_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2695_to_fp16 = const()[name = string("op_2695_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314834176)))];
+            tensor<fp16, [1280]> var_2696_to_fp16 = const()[name = string("op_2696_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(318111040)))];
+            tensor<fp16, [1, 1500, 1280]> linear_144_cast_fp16 = linear(bias = var_2696_to_fp16, weight = var_2695_to_fp16, x = var_2684_cast_fp16)[name = string("linear_144_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2699_to_fp16 = const()[name = string("op_2699_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(318113664)))];
+            tensor<fp16, [1, 1500, 1280]> linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2699_to_fp16, x = var_2684_cast_fp16)[name = string("linear_145_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2703_to_fp16 = const()[name = string("op_2703_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(321390528)))];
+            tensor<fp16, [1280]> var_2704_to_fp16 = const()[name = string("op_2704_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(324667392)))];
+            tensor<fp16, [1, 1500, 1280]> linear_146_cast_fp16 = linear(bias = var_2704_to_fp16, weight = var_2703_to_fp16, x = var_2684_cast_fp16)[name = string("linear_146_cast_fp16")];
+            tensor<int32, [4]> var_2712 = const()[name = string("op_2712"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2713_cast_fp16 = reshape(shape = var_2712, x = linear_144_cast_fp16)[name = string("op_2713_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_272_to_fp16 = const()[name = string("const_272_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_99_cast_fp16 = mul(x = var_2713_cast_fp16, y = const_272_to_fp16)[name = string("q_99_cast_fp16")];
+            tensor<int32, [4]> var_2719 = const()[name = string("op_2719"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2720_cast_fp16 = reshape(shape = var_2719, x = linear_145_cast_fp16)[name = string("op_2720_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_99_cast_fp16 = mul(x = var_2720_cast_fp16, y = const_273_to_fp16)[name = string("k_99_cast_fp16")];
+            tensor<int32, [4]> var_2726 = const()[name = string("op_2726"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2727_cast_fp16 = reshape(shape = var_2726, x = linear_146_cast_fp16)[name = string("op_2727_cast_fp16")];
+            tensor<int32, [4]> var_2728 = const()[name = string("op_2728"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)];
+            bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_81 = transpose(perm = transpose_81_perm_0, x = k_99_cast_fp16)[name = string("transpose_126")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = q_99_cast_fp16)[name = string("transpose_127")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_80, y = transpose_81)[name = string("qk_49_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2732_cast_fp16 = softmax(axis = var_2668, x = qk_49_cast_fp16)[name = string("op_2732_cast_fp16")];
+            bool var_2734_transpose_x_0 = const()[name = string("op_2734_transpose_x_0"), val = bool(false)];
+            bool var_2734_transpose_y_0 = const()[name = string("op_2734_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_99_cast_fp16 = transpose(perm = var_2728, x = var_2727_cast_fp16)[name = string("transpose_125")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2734_cast_fp16 = matmul(transpose_x = var_2734_transpose_x_0, transpose_y = var_2734_transpose_y_0, x = var_2732_cast_fp16, y = v_99_cast_fp16)[name = string("op_2734_cast_fp16")];
+            tensor<int32, [4]> var_2735 = const()[name = string("op_2735"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_24 = const()[name = string("concat_24"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2736_cast_fp16 = transpose(perm = var_2735, x = var_2734_cast_fp16)[name = string("transpose_124")];
+            tensor<fp16, [1, 1500, 1280]> x_299_cast_fp16 = reshape(shape = concat_24, x = var_2736_cast_fp16)[name = string("x_299_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2740_to_fp16 = const()[name = string("op_2740_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(324670016)))];
+            tensor<fp16, [1280]> var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327946880)))];
+            tensor<fp16, [1, 1500, 1280]> linear_147_cast_fp16 = linear(bias = var_2741_to_fp16, weight = var_2740_to_fp16, x = x_299_cast_fp16)[name = string("linear_147_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_301_cast_fp16 = add(x = x_295_cast_fp16, y = linear_147_cast_fp16)[name = string("x_301_cast_fp16")];
+            tensor<int32, [1]> var_2748_axes_0 = const()[name = string("op_2748_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_24_mlp_ln_weight_to_fp16 = const()[name = string("blocks_24_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327949504)))];
+            tensor<fp16, [1280]> blocks_24_mlp_ln_bias_to_fp16 = const()[name = string("blocks_24_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327952128)))];
+            tensor<fp16, [1, 1500, 1280]> var_2748_cast_fp16 = layer_norm(axes = var_2748_axes_0, beta = blocks_24_mlp_ln_bias_to_fp16, epsilon = var_2674_to_fp16, gamma = blocks_24_mlp_ln_weight_to_fp16, x = x_301_cast_fp16)[name = string("op_2748_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2757_to_fp16 = const()[name = string("op_2757_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327954752)))];
+            tensor<fp16, [5120]> var_2758_to_fp16 = const()[name = string("op_2758_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(341062016)))];
+            tensor<fp16, [1, 1500, 5120]> linear_148_cast_fp16 = linear(bias = var_2758_to_fp16, weight = var_2757_to_fp16, x = var_2748_cast_fp16)[name = string("linear_148_cast_fp16")];
+            string x_305_mode_0 = const()[name = string("x_305_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_305_cast_fp16 = gelu(mode = x_305_mode_0, x = linear_148_cast_fp16)[name = string("x_305_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2763_to_fp16 = const()[name = string("op_2763_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(341072320)))];
+            tensor<fp16, [1280]> var_2764_to_fp16 = const()[name = string("op_2764_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354179584)))];
+            tensor<fp16, [1, 1500, 1280]> linear_149_cast_fp16 = linear(bias = var_2764_to_fp16, weight = var_2763_to_fp16, x = x_305_cast_fp16)[name = string("linear_149_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_307_cast_fp16 = add(x = x_301_cast_fp16, y = linear_149_cast_fp16)[name = string("x_307_cast_fp16")];
+            int32 var_2774 = const()[name = string("op_2774"), val = int32(-1)];
+            tensor<int32, [1]> var_2790_axes_0 = const()[name = string("op_2790_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_25_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354182208)))];
+            tensor<fp16, [1280]> blocks_25_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354184832)))];
+            fp16 var_2780_to_fp16 = const()[name = string("op_2780_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2790_cast_fp16 = layer_norm(axes = var_2790_axes_0, beta = blocks_25_attn_ln_bias_to_fp16, epsilon = var_2780_to_fp16, gamma = blocks_25_attn_ln_weight_to_fp16, x = x_307_cast_fp16)[name = string("op_2790_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2801_to_fp16 = const()[name = string("op_2801_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354187456)))];
+            tensor<fp16, [1280]> var_2802_to_fp16 = const()[name = string("op_2802_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(357464320)))];
+            tensor<fp16, [1, 1500, 1280]> linear_150_cast_fp16 = linear(bias = var_2802_to_fp16, weight = var_2801_to_fp16, x = var_2790_cast_fp16)[name = string("linear_150_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2805_to_fp16 = const()[name = string("op_2805_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(357466944)))];
+            tensor<fp16, [1, 1500, 1280]> linear_151_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2805_to_fp16, x = var_2790_cast_fp16)[name = string("linear_151_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2809_to_fp16 = const()[name = string("op_2809_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(360743808)))];
+            tensor<fp16, [1280]> var_2810_to_fp16 = const()[name = string("op_2810_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(364020672)))];
+            tensor<fp16, [1, 1500, 1280]> linear_152_cast_fp16 = linear(bias = var_2810_to_fp16, weight = var_2809_to_fp16, x = var_2790_cast_fp16)[name = string("linear_152_cast_fp16")];
+            tensor<int32, [4]> var_2818 = const()[name = string("op_2818"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2819_cast_fp16 = reshape(shape = var_2818, x = linear_150_cast_fp16)[name = string("op_2819_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_274_to_fp16 = const()[name = string("const_274_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_103_cast_fp16 = mul(x = var_2819_cast_fp16, y = const_274_to_fp16)[name = string("q_103_cast_fp16")];
+            tensor<int32, [4]> var_2825 = const()[name = string("op_2825"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2826_cast_fp16 = reshape(shape = var_2825, x = linear_151_cast_fp16)[name = string("op_2826_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_275_to_fp16 = const()[name = string("const_275_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_103_cast_fp16 = mul(x = var_2826_cast_fp16, y = const_275_to_fp16)[name = string("k_103_cast_fp16")];
+            tensor<int32, [4]> var_2832 = const()[name = string("op_2832"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2833_cast_fp16 = reshape(shape = var_2832, x = linear_152_cast_fp16)[name = string("op_2833_cast_fp16")];
+            tensor<int32, [4]> var_2834 = const()[name = string("op_2834"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_51_transpose_x_0 = const()[name = string("qk_51_transpose_x_0"), val = bool(false)];
+            bool qk_51_transpose_y_0 = const()[name = string("qk_51_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_83 = transpose(perm = transpose_83_perm_0, x = k_103_cast_fp16)[name = string("transpose_122")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_82 = transpose(perm = transpose_82_perm_0, x = q_103_cast_fp16)[name = string("transpose_123")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_51_cast_fp16 = matmul(transpose_x = qk_51_transpose_x_0, transpose_y = qk_51_transpose_y_0, x = transpose_82, y = transpose_83)[name = string("qk_51_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2838_cast_fp16 = softmax(axis = var_2774, x = qk_51_cast_fp16)[name = string("op_2838_cast_fp16")];
+            bool var_2840_transpose_x_0 = const()[name = string("op_2840_transpose_x_0"), val = bool(false)];
+            bool var_2840_transpose_y_0 = const()[name = string("op_2840_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_103_cast_fp16 = transpose(perm = var_2834, x = var_2833_cast_fp16)[name = string("transpose_121")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2840_cast_fp16 = matmul(transpose_x = var_2840_transpose_x_0, transpose_y = var_2840_transpose_y_0, x = var_2838_cast_fp16, y = v_103_cast_fp16)[name = string("op_2840_cast_fp16")];
+            tensor<int32, [4]> var_2841 = const()[name = string("op_2841"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_25 = const()[name = string("concat_25"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2842_cast_fp16 = transpose(perm = var_2841, x = var_2840_cast_fp16)[name = string("transpose_120")];
+            tensor<fp16, [1, 1500, 1280]> x_311_cast_fp16 = reshape(shape = concat_25, x = var_2842_cast_fp16)[name = string("x_311_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2846_to_fp16 = const()[name = string("op_2846_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(364023296)))];
+            tensor<fp16, [1280]> var_2847_to_fp16 = const()[name = string("op_2847_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367300160)))];
+            tensor<fp16, [1, 1500, 1280]> linear_153_cast_fp16 = linear(bias = var_2847_to_fp16, weight = var_2846_to_fp16, x = x_311_cast_fp16)[name = string("linear_153_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_313_cast_fp16 = add(x = x_307_cast_fp16, y = linear_153_cast_fp16)[name = string("x_313_cast_fp16")];
+            tensor<int32, [1]> var_2854_axes_0 = const()[name = string("op_2854_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_25_mlp_ln_weight_to_fp16 = const()[name = string("blocks_25_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367302784)))];
+            tensor<fp16, [1280]> blocks_25_mlp_ln_bias_to_fp16 = const()[name = string("blocks_25_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367305408)))];
+            tensor<fp16, [1, 1500, 1280]> var_2854_cast_fp16 = layer_norm(axes = var_2854_axes_0, beta = blocks_25_mlp_ln_bias_to_fp16, epsilon = var_2780_to_fp16, gamma = blocks_25_mlp_ln_weight_to_fp16, x = x_313_cast_fp16)[name = string("op_2854_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2863_to_fp16 = const()[name = string("op_2863_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367308032)))];
+            tensor<fp16, [5120]> var_2864_to_fp16 = const()[name = string("op_2864_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(380415296)))];
+            tensor<fp16, [1, 1500, 5120]> linear_154_cast_fp16 = linear(bias = var_2864_to_fp16, weight = var_2863_to_fp16, x = var_2854_cast_fp16)[name = string("linear_154_cast_fp16")];
+            string x_317_mode_0 = const()[name = string("x_317_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_317_cast_fp16 = gelu(mode = x_317_mode_0, x = linear_154_cast_fp16)[name = string("x_317_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2869_to_fp16 = const()[name = string("op_2869_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(380425600)))];
+            tensor<fp16, [1280]> var_2870_to_fp16 = const()[name = string("op_2870_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393532864)))];
+            tensor<fp16, [1, 1500, 1280]> linear_155_cast_fp16 = linear(bias = var_2870_to_fp16, weight = var_2869_to_fp16, x = x_317_cast_fp16)[name = string("linear_155_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_319_cast_fp16 = add(x = x_313_cast_fp16, y = linear_155_cast_fp16)[name = string("x_319_cast_fp16")];
+            int32 var_2880 = const()[name = string("op_2880"), val = int32(-1)];
+            tensor<int32, [1]> var_2896_axes_0 = const()[name = string("op_2896_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_26_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393535488)))];
+            tensor<fp16, [1280]> blocks_26_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393538112)))];
+            fp16 var_2886_to_fp16 = const()[name = string("op_2886_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_2896_cast_fp16 = layer_norm(axes = var_2896_axes_0, beta = blocks_26_attn_ln_bias_to_fp16, epsilon = var_2886_to_fp16, gamma = blocks_26_attn_ln_weight_to_fp16, x = x_319_cast_fp16)[name = string("op_2896_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2907_to_fp16 = const()[name = string("op_2907_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393540736)))];
+            tensor<fp16, [1280]> var_2908_to_fp16 = const()[name = string("op_2908_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(396817600)))];
+            tensor<fp16, [1, 1500, 1280]> linear_156_cast_fp16 = linear(bias = var_2908_to_fp16, weight = var_2907_to_fp16, x = var_2896_cast_fp16)[name = string("linear_156_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2911_to_fp16 = const()[name = string("op_2911_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(396820224)))];
+            tensor<fp16, [1, 1500, 1280]> linear_157_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2911_to_fp16, x = var_2896_cast_fp16)[name = string("linear_157_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2915_to_fp16 = const()[name = string("op_2915_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(400097088)))];
+            tensor<fp16, [1280]> var_2916_to_fp16 = const()[name = string("op_2916_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(403373952)))];
+            tensor<fp16, [1, 1500, 1280]> linear_158_cast_fp16 = linear(bias = var_2916_to_fp16, weight = var_2915_to_fp16, x = var_2896_cast_fp16)[name = string("linear_158_cast_fp16")];
+            tensor<int32, [4]> var_2924 = const()[name = string("op_2924"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2925_cast_fp16 = reshape(shape = var_2924, x = linear_156_cast_fp16)[name = string("op_2925_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_276_to_fp16 = const()[name = string("const_276_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_107_cast_fp16 = mul(x = var_2925_cast_fp16, y = const_276_to_fp16)[name = string("q_107_cast_fp16")];
+            tensor<int32, [4]> var_2931 = const()[name = string("op_2931"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2932_cast_fp16 = reshape(shape = var_2931, x = linear_157_cast_fp16)[name = string("op_2932_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_277_to_fp16 = const()[name = string("const_277_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_107_cast_fp16 = mul(x = var_2932_cast_fp16, y = const_277_to_fp16)[name = string("k_107_cast_fp16")];
+            tensor<int32, [4]> var_2938 = const()[name = string("op_2938"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2939_cast_fp16 = reshape(shape = var_2938, x = linear_158_cast_fp16)[name = string("op_2939_cast_fp16")];
+            tensor<int32, [4]> var_2940 = const()[name = string("op_2940"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)];
+            bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_85 = transpose(perm = transpose_85_perm_0, x = k_107_cast_fp16)[name = string("transpose_118")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = q_107_cast_fp16)[name = string("transpose_119")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_84, y = transpose_85)[name = string("qk_53_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2944_cast_fp16 = softmax(axis = var_2880, x = qk_53_cast_fp16)[name = string("op_2944_cast_fp16")];
+            bool var_2946_transpose_x_0 = const()[name = string("op_2946_transpose_x_0"), val = bool(false)];
+            bool var_2946_transpose_y_0 = const()[name = string("op_2946_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_107_cast_fp16 = transpose(perm = var_2940, x = var_2939_cast_fp16)[name = string("transpose_117")];
+            tensor<fp16, [1, 20, 1500, 64]> var_2946_cast_fp16 = matmul(transpose_x = var_2946_transpose_x_0, transpose_y = var_2946_transpose_y_0, x = var_2944_cast_fp16, y = v_107_cast_fp16)[name = string("op_2946_cast_fp16")];
+            tensor<int32, [4]> var_2947 = const()[name = string("op_2947"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_26 = const()[name = string("concat_26"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_2948_cast_fp16 = transpose(perm = var_2947, x = var_2946_cast_fp16)[name = string("transpose_116")];
+            tensor<fp16, [1, 1500, 1280]> x_323_cast_fp16 = reshape(shape = concat_26, x = var_2948_cast_fp16)[name = string("x_323_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_2952_to_fp16 = const()[name = string("op_2952_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(403376576)))];
+            tensor<fp16, [1280]> var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406653440)))];
+            tensor<fp16, [1, 1500, 1280]> linear_159_cast_fp16 = linear(bias = var_2953_to_fp16, weight = var_2952_to_fp16, x = x_323_cast_fp16)[name = string("linear_159_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_325_cast_fp16 = add(x = x_319_cast_fp16, y = linear_159_cast_fp16)[name = string("x_325_cast_fp16")];
+            tensor<int32, [1]> var_2960_axes_0 = const()[name = string("op_2960_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_26_mlp_ln_weight_to_fp16 = const()[name = string("blocks_26_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406656064)))];
+            tensor<fp16, [1280]> blocks_26_mlp_ln_bias_to_fp16 = const()[name = string("blocks_26_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406658688)))];
+            tensor<fp16, [1, 1500, 1280]> var_2960_cast_fp16 = layer_norm(axes = var_2960_axes_0, beta = blocks_26_mlp_ln_bias_to_fp16, epsilon = var_2886_to_fp16, gamma = blocks_26_mlp_ln_weight_to_fp16, x = x_325_cast_fp16)[name = string("op_2960_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_2969_to_fp16 = const()[name = string("op_2969_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406661312)))];
+            tensor<fp16, [5120]> var_2970_to_fp16 = const()[name = string("op_2970_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(419768576)))];
+            tensor<fp16, [1, 1500, 5120]> linear_160_cast_fp16 = linear(bias = var_2970_to_fp16, weight = var_2969_to_fp16, x = var_2960_cast_fp16)[name = string("linear_160_cast_fp16")];
+            string x_329_mode_0 = const()[name = string("x_329_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_329_cast_fp16 = gelu(mode = x_329_mode_0, x = linear_160_cast_fp16)[name = string("x_329_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_2975_to_fp16 = const()[name = string("op_2975_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(419778880)))];
+            tensor<fp16, [1280]> var_2976_to_fp16 = const()[name = string("op_2976_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432886144)))];
+            tensor<fp16, [1, 1500, 1280]> linear_161_cast_fp16 = linear(bias = var_2976_to_fp16, weight = var_2975_to_fp16, x = x_329_cast_fp16)[name = string("linear_161_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_331_cast_fp16 = add(x = x_325_cast_fp16, y = linear_161_cast_fp16)[name = string("x_331_cast_fp16")];
+            int32 var_2986 = const()[name = string("op_2986"), val = int32(-1)];
+            tensor<int32, [1]> var_3002_axes_0 = const()[name = string("op_3002_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_27_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432888768)))];
+            tensor<fp16, [1280]> blocks_27_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432891392)))];
+            fp16 var_2992_to_fp16 = const()[name = string("op_2992_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_3002_cast_fp16 = layer_norm(axes = var_3002_axes_0, beta = blocks_27_attn_ln_bias_to_fp16, epsilon = var_2992_to_fp16, gamma = blocks_27_attn_ln_weight_to_fp16, x = x_331_cast_fp16)[name = string("op_3002_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3013_to_fp16 = const()[name = string("op_3013_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432894016)))];
+            tensor<fp16, [1280]> var_3014_to_fp16 = const()[name = string("op_3014_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(436170880)))];
+            tensor<fp16, [1, 1500, 1280]> linear_162_cast_fp16 = linear(bias = var_3014_to_fp16, weight = var_3013_to_fp16, x = var_3002_cast_fp16)[name = string("linear_162_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3017_to_fp16 = const()[name = string("op_3017_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(436173504)))];
+            tensor<fp16, [1, 1500, 1280]> linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3017_to_fp16, x = var_3002_cast_fp16)[name = string("linear_163_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3021_to_fp16 = const()[name = string("op_3021_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(439450368)))];
+            tensor<fp16, [1280]> var_3022_to_fp16 = const()[name = string("op_3022_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(442727232)))];
+            tensor<fp16, [1, 1500, 1280]> linear_164_cast_fp16 = linear(bias = var_3022_to_fp16, weight = var_3021_to_fp16, x = var_3002_cast_fp16)[name = string("linear_164_cast_fp16")];
+            tensor<int32, [4]> var_3030 = const()[name = string("op_3030"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3031_cast_fp16 = reshape(shape = var_3030, x = linear_162_cast_fp16)[name = string("op_3031_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_278_to_fp16 = const()[name = string("const_278_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_111_cast_fp16 = mul(x = var_3031_cast_fp16, y = const_278_to_fp16)[name = string("q_111_cast_fp16")];
+            tensor<int32, [4]> var_3037 = const()[name = string("op_3037"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3038_cast_fp16 = reshape(shape = var_3037, x = linear_163_cast_fp16)[name = string("op_3038_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_279_to_fp16 = const()[name = string("const_279_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_111_cast_fp16 = mul(x = var_3038_cast_fp16, y = const_279_to_fp16)[name = string("k_111_cast_fp16")];
+            tensor<int32, [4]> var_3044 = const()[name = string("op_3044"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3045_cast_fp16 = reshape(shape = var_3044, x = linear_164_cast_fp16)[name = string("op_3045_cast_fp16")];
+            tensor<int32, [4]> var_3046 = const()[name = string("op_3046"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)];
+            bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_87 = transpose(perm = transpose_87_perm_0, x = k_111_cast_fp16)[name = string("transpose_114")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_86 = transpose(perm = transpose_86_perm_0, x = q_111_cast_fp16)[name = string("transpose_115")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_86, y = transpose_87)[name = string("qk_55_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3050_cast_fp16 = softmax(axis = var_2986, x = qk_55_cast_fp16)[name = string("op_3050_cast_fp16")];
+            bool var_3052_transpose_x_0 = const()[name = string("op_3052_transpose_x_0"), val = bool(false)];
+            bool var_3052_transpose_y_0 = const()[name = string("op_3052_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_111_cast_fp16 = transpose(perm = var_3046, x = var_3045_cast_fp16)[name = string("transpose_113")];
+            tensor<fp16, [1, 20, 1500, 64]> var_3052_cast_fp16 = matmul(transpose_x = var_3052_transpose_x_0, transpose_y = var_3052_transpose_y_0, x = var_3050_cast_fp16, y = v_111_cast_fp16)[name = string("op_3052_cast_fp16")];
+            tensor<int32, [4]> var_3053 = const()[name = string("op_3053"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_27 = const()[name = string("concat_27"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3054_cast_fp16 = transpose(perm = var_3053, x = var_3052_cast_fp16)[name = string("transpose_112")];
+            tensor<fp16, [1, 1500, 1280]> x_335_cast_fp16 = reshape(shape = concat_27, x = var_3054_cast_fp16)[name = string("x_335_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3058_to_fp16 = const()[name = string("op_3058_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(442729856)))];
+            tensor<fp16, [1280]> var_3059_to_fp16 = const()[name = string("op_3059_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446006720)))];
+            tensor<fp16, [1, 1500, 1280]> linear_165_cast_fp16 = linear(bias = var_3059_to_fp16, weight = var_3058_to_fp16, x = x_335_cast_fp16)[name = string("linear_165_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_337_cast_fp16 = add(x = x_331_cast_fp16, y = linear_165_cast_fp16)[name = string("x_337_cast_fp16")];
+            tensor<int32, [1]> var_3066_axes_0 = const()[name = string("op_3066_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_27_mlp_ln_weight_to_fp16 = const()[name = string("blocks_27_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446009344)))];
+            tensor<fp16, [1280]> blocks_27_mlp_ln_bias_to_fp16 = const()[name = string("blocks_27_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446011968)))];
+            tensor<fp16, [1, 1500, 1280]> var_3066_cast_fp16 = layer_norm(axes = var_3066_axes_0, beta = blocks_27_mlp_ln_bias_to_fp16, epsilon = var_2992_to_fp16, gamma = blocks_27_mlp_ln_weight_to_fp16, x = x_337_cast_fp16)[name = string("op_3066_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3075_to_fp16 = const()[name = string("op_3075_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446014592)))];
+            tensor<fp16, [5120]> var_3076_to_fp16 = const()[name = string("op_3076_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(459121856)))];
+            tensor<fp16, [1, 1500, 5120]> linear_166_cast_fp16 = linear(bias = var_3076_to_fp16, weight = var_3075_to_fp16, x = var_3066_cast_fp16)[name = string("linear_166_cast_fp16")];
+            string x_341_mode_0 = const()[name = string("x_341_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_341_cast_fp16 = gelu(mode = x_341_mode_0, x = linear_166_cast_fp16)[name = string("x_341_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3081_to_fp16 = const()[name = string("op_3081_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(459132160)))];
+            tensor<fp16, [1280]> var_3082_to_fp16 = const()[name = string("op_3082_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472239424)))];
+            tensor<fp16, [1, 1500, 1280]> linear_167_cast_fp16 = linear(bias = var_3082_to_fp16, weight = var_3081_to_fp16, x = x_341_cast_fp16)[name = string("linear_167_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_343_cast_fp16 = add(x = x_337_cast_fp16, y = linear_167_cast_fp16)[name = string("x_343_cast_fp16")];
+            int32 var_3092 = const()[name = string("op_3092"), val = int32(-1)];
+            tensor<int32, [1]> var_3108_axes_0 = const()[name = string("op_3108_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_28_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472242048)))];
+            tensor<fp16, [1280]> blocks_28_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472244672)))];
+            fp16 var_3098_to_fp16 = const()[name = string("op_3098_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_3108_cast_fp16 = layer_norm(axes = var_3108_axes_0, beta = blocks_28_attn_ln_bias_to_fp16, epsilon = var_3098_to_fp16, gamma = blocks_28_attn_ln_weight_to_fp16, x = x_343_cast_fp16)[name = string("op_3108_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3119_to_fp16 = const()[name = string("op_3119_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472247296)))];
+            tensor<fp16, [1280]> var_3120_to_fp16 = const()[name = string("op_3120_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(475524160)))];
+            tensor<fp16, [1, 1500, 1280]> linear_168_cast_fp16 = linear(bias = var_3120_to_fp16, weight = var_3119_to_fp16, x = var_3108_cast_fp16)[name = string("linear_168_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3123_to_fp16 = const()[name = string("op_3123_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(475526784)))];
+            tensor<fp16, [1, 1500, 1280]> linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3123_to_fp16, x = var_3108_cast_fp16)[name = string("linear_169_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3127_to_fp16 = const()[name = string("op_3127_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(478803648)))];
+            tensor<fp16, [1280]> var_3128_to_fp16 = const()[name = string("op_3128_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(482080512)))];
+            tensor<fp16, [1, 1500, 1280]> linear_170_cast_fp16 = linear(bias = var_3128_to_fp16, weight = var_3127_to_fp16, x = var_3108_cast_fp16)[name = string("linear_170_cast_fp16")];
+            tensor<int32, [4]> var_3136 = const()[name = string("op_3136"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3137_cast_fp16 = reshape(shape = var_3136, x = linear_168_cast_fp16)[name = string("op_3137_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_115_cast_fp16 = mul(x = var_3137_cast_fp16, y = const_280_to_fp16)[name = string("q_115_cast_fp16")];
+            tensor<int32, [4]> var_3143 = const()[name = string("op_3143"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3144_cast_fp16 = reshape(shape = var_3143, x = linear_169_cast_fp16)[name = string("op_3144_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_115_cast_fp16 = mul(x = var_3144_cast_fp16, y = const_281_to_fp16)[name = string("k_115_cast_fp16")];
+            tensor<int32, [4]> var_3150 = const()[name = string("op_3150"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3151_cast_fp16 = reshape(shape = var_3150, x = linear_170_cast_fp16)[name = string("op_3151_cast_fp16")];
+            tensor<int32, [4]> var_3152 = const()[name = string("op_3152"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_57_transpose_x_0 = const()[name = string("qk_57_transpose_x_0"), val = bool(false)];
+            bool qk_57_transpose_y_0 = const()[name = string("qk_57_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_89 = transpose(perm = transpose_89_perm_0, x = k_115_cast_fp16)[name = string("transpose_110")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = q_115_cast_fp16)[name = string("transpose_111")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_57_cast_fp16 = matmul(transpose_x = qk_57_transpose_x_0, transpose_y = qk_57_transpose_y_0, x = transpose_88, y = transpose_89)[name = string("qk_57_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3156_cast_fp16 = softmax(axis = var_3092, x = qk_57_cast_fp16)[name = string("op_3156_cast_fp16")];
+            bool var_3158_transpose_x_0 = const()[name = string("op_3158_transpose_x_0"), val = bool(false)];
+            bool var_3158_transpose_y_0 = const()[name = string("op_3158_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_115_cast_fp16 = transpose(perm = var_3152, x = var_3151_cast_fp16)[name = string("transpose_109")];
+            tensor<fp16, [1, 20, 1500, 64]> var_3158_cast_fp16 = matmul(transpose_x = var_3158_transpose_x_0, transpose_y = var_3158_transpose_y_0, x = var_3156_cast_fp16, y = v_115_cast_fp16)[name = string("op_3158_cast_fp16")];
+            tensor<int32, [4]> var_3159 = const()[name = string("op_3159"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_28 = const()[name = string("concat_28"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3160_cast_fp16 = transpose(perm = var_3159, x = var_3158_cast_fp16)[name = string("transpose_108")];
+            tensor<fp16, [1, 1500, 1280]> x_347_cast_fp16 = reshape(shape = concat_28, x = var_3160_cast_fp16)[name = string("x_347_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3164_to_fp16 = const()[name = string("op_3164_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(482083136)))];
+            tensor<fp16, [1280]> var_3165_to_fp16 = const()[name = string("op_3165_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485360000)))];
+            tensor<fp16, [1, 1500, 1280]> linear_171_cast_fp16 = linear(bias = var_3165_to_fp16, weight = var_3164_to_fp16, x = x_347_cast_fp16)[name = string("linear_171_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_349_cast_fp16 = add(x = x_343_cast_fp16, y = linear_171_cast_fp16)[name = string("x_349_cast_fp16")];
+            tensor<int32, [1]> var_3172_axes_0 = const()[name = string("op_3172_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_28_mlp_ln_weight_to_fp16 = const()[name = string("blocks_28_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485362624)))];
+            tensor<fp16, [1280]> blocks_28_mlp_ln_bias_to_fp16 = const()[name = string("blocks_28_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485365248)))];
+            tensor<fp16, [1, 1500, 1280]> var_3172_cast_fp16 = layer_norm(axes = var_3172_axes_0, beta = blocks_28_mlp_ln_bias_to_fp16, epsilon = var_3098_to_fp16, gamma = blocks_28_mlp_ln_weight_to_fp16, x = x_349_cast_fp16)[name = string("op_3172_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3181_to_fp16 = const()[name = string("op_3181_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485367872)))];
+            tensor<fp16, [5120]> var_3182_to_fp16 = const()[name = string("op_3182_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(498475136)))];
+            tensor<fp16, [1, 1500, 5120]> linear_172_cast_fp16 = linear(bias = var_3182_to_fp16, weight = var_3181_to_fp16, x = var_3172_cast_fp16)[name = string("linear_172_cast_fp16")];
+            string x_353_mode_0 = const()[name = string("x_353_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_353_cast_fp16 = gelu(mode = x_353_mode_0, x = linear_172_cast_fp16)[name = string("x_353_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3187_to_fp16 = const()[name = string("op_3187_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(498485440)))];
+            tensor<fp16, [1280]> var_3188_to_fp16 = const()[name = string("op_3188_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511592704)))];
+            tensor<fp16, [1, 1500, 1280]> linear_173_cast_fp16 = linear(bias = var_3188_to_fp16, weight = var_3187_to_fp16, x = x_353_cast_fp16)[name = string("linear_173_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_355_cast_fp16 = add(x = x_349_cast_fp16, y = linear_173_cast_fp16)[name = string("x_355_cast_fp16")];
+            int32 var_3198 = const()[name = string("op_3198"), val = int32(-1)];
+            tensor<int32, [1]> var_3214_axes_0 = const()[name = string("op_3214_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_29_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511595328)))];
+            tensor<fp16, [1280]> blocks_29_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511597952)))];
+            fp16 var_3204_to_fp16 = const()[name = string("op_3204_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_3214_cast_fp16 = layer_norm(axes = var_3214_axes_0, beta = blocks_29_attn_ln_bias_to_fp16, epsilon = var_3204_to_fp16, gamma = blocks_29_attn_ln_weight_to_fp16, x = x_355_cast_fp16)[name = string("op_3214_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3225_to_fp16 = const()[name = string("op_3225_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511600576)))];
+            tensor<fp16, [1280]> var_3226_to_fp16 = const()[name = string("op_3226_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(514877440)))];
+            tensor<fp16, [1, 1500, 1280]> linear_174_cast_fp16 = linear(bias = var_3226_to_fp16, weight = var_3225_to_fp16, x = var_3214_cast_fp16)[name = string("linear_174_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3229_to_fp16 = const()[name = string("op_3229_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(514880064)))];
+            tensor<fp16, [1, 1500, 1280]> linear_175_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3229_to_fp16, x = var_3214_cast_fp16)[name = string("linear_175_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3233_to_fp16 = const()[name = string("op_3233_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(518156928)))];
+            tensor<fp16, [1280]> var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(521433792)))];
+            tensor<fp16, [1, 1500, 1280]> linear_176_cast_fp16 = linear(bias = var_3234_to_fp16, weight = var_3233_to_fp16, x = var_3214_cast_fp16)[name = string("linear_176_cast_fp16")];
+            tensor<int32, [4]> var_3242 = const()[name = string("op_3242"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3243_cast_fp16 = reshape(shape = var_3242, x = linear_174_cast_fp16)[name = string("op_3243_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_282_to_fp16 = const()[name = string("const_282_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_119_cast_fp16 = mul(x = var_3243_cast_fp16, y = const_282_to_fp16)[name = string("q_119_cast_fp16")];
+            tensor<int32, [4]> var_3249 = const()[name = string("op_3249"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3250_cast_fp16 = reshape(shape = var_3249, x = linear_175_cast_fp16)[name = string("op_3250_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_119_cast_fp16 = mul(x = var_3250_cast_fp16, y = const_283_to_fp16)[name = string("k_119_cast_fp16")];
+            tensor<int32, [4]> var_3256 = const()[name = string("op_3256"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3257_cast_fp16 = reshape(shape = var_3256, x = linear_176_cast_fp16)[name = string("op_3257_cast_fp16")];
+            tensor<int32, [4]> var_3258 = const()[name = string("op_3258"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)];
+            bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_91 = transpose(perm = transpose_91_perm_0, x = k_119_cast_fp16)[name = string("transpose_106")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_90 = transpose(perm = transpose_90_perm_0, x = q_119_cast_fp16)[name = string("transpose_107")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_90, y = transpose_91)[name = string("qk_59_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3262_cast_fp16 = softmax(axis = var_3198, x = qk_59_cast_fp16)[name = string("op_3262_cast_fp16")];
+            bool var_3264_transpose_x_0 = const()[name = string("op_3264_transpose_x_0"), val = bool(false)];
+            bool var_3264_transpose_y_0 = const()[name = string("op_3264_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_119_cast_fp16 = transpose(perm = var_3258, x = var_3257_cast_fp16)[name = string("transpose_105")];
+            tensor<fp16, [1, 20, 1500, 64]> var_3264_cast_fp16 = matmul(transpose_x = var_3264_transpose_x_0, transpose_y = var_3264_transpose_y_0, x = var_3262_cast_fp16, y = v_119_cast_fp16)[name = string("op_3264_cast_fp16")];
+            tensor<int32, [4]> var_3265 = const()[name = string("op_3265"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_29 = const()[name = string("concat_29"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3266_cast_fp16 = transpose(perm = var_3265, x = var_3264_cast_fp16)[name = string("transpose_104")];
+            tensor<fp16, [1, 1500, 1280]> x_359_cast_fp16 = reshape(shape = concat_29, x = var_3266_cast_fp16)[name = string("x_359_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(521436416)))];
+            tensor<fp16, [1280]> var_3271_to_fp16 = const()[name = string("op_3271_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524713280)))];
+            tensor<fp16, [1, 1500, 1280]> linear_177_cast_fp16 = linear(bias = var_3271_to_fp16, weight = var_3270_to_fp16, x = x_359_cast_fp16)[name = string("linear_177_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_361_cast_fp16 = add(x = x_355_cast_fp16, y = linear_177_cast_fp16)[name = string("x_361_cast_fp16")];
+            tensor<int32, [1]> var_3278_axes_0 = const()[name = string("op_3278_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_29_mlp_ln_weight_to_fp16 = const()[name = string("blocks_29_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524715904)))];
+            tensor<fp16, [1280]> blocks_29_mlp_ln_bias_to_fp16 = const()[name = string("blocks_29_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524718528)))];
+            tensor<fp16, [1, 1500, 1280]> var_3278_cast_fp16 = layer_norm(axes = var_3278_axes_0, beta = blocks_29_mlp_ln_bias_to_fp16, epsilon = var_3204_to_fp16, gamma = blocks_29_mlp_ln_weight_to_fp16, x = x_361_cast_fp16)[name = string("op_3278_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3287_to_fp16 = const()[name = string("op_3287_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524721152)))];
+            tensor<fp16, [5120]> var_3288_to_fp16 = const()[name = string("op_3288_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(537828416)))];
+            tensor<fp16, [1, 1500, 5120]> linear_178_cast_fp16 = linear(bias = var_3288_to_fp16, weight = var_3287_to_fp16, x = var_3278_cast_fp16)[name = string("linear_178_cast_fp16")];
+            string x_365_mode_0 = const()[name = string("x_365_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_365_cast_fp16 = gelu(mode = x_365_mode_0, x = linear_178_cast_fp16)[name = string("x_365_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3293_to_fp16 = const()[name = string("op_3293_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(537838720)))];
+            tensor<fp16, [1280]> var_3294_to_fp16 = const()[name = string("op_3294_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550945984)))];
+            tensor<fp16, [1, 1500, 1280]> linear_179_cast_fp16 = linear(bias = var_3294_to_fp16, weight = var_3293_to_fp16, x = x_365_cast_fp16)[name = string("linear_179_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_367_cast_fp16 = add(x = x_361_cast_fp16, y = linear_179_cast_fp16)[name = string("x_367_cast_fp16")];
+            int32 var_3304 = const()[name = string("op_3304"), val = int32(-1)];
+            tensor<int32, [1]> var_3320_axes_0 = const()[name = string("op_3320_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_30_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550948608)))];
+            tensor<fp16, [1280]> blocks_30_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550951232)))];
+            fp16 var_3310_to_fp16 = const()[name = string("op_3310_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_3320_cast_fp16 = layer_norm(axes = var_3320_axes_0, beta = blocks_30_attn_ln_bias_to_fp16, epsilon = var_3310_to_fp16, gamma = blocks_30_attn_ln_weight_to_fp16, x = x_367_cast_fp16)[name = string("op_3320_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3331_to_fp16 = const()[name = string("op_3331_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550953856)))];
+            tensor<fp16, [1280]> var_3332_to_fp16 = const()[name = string("op_3332_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(554230720)))];
+            tensor<fp16, [1, 1500, 1280]> linear_180_cast_fp16 = linear(bias = var_3332_to_fp16, weight = var_3331_to_fp16, x = var_3320_cast_fp16)[name = string("linear_180_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3335_to_fp16 = const()[name = string("op_3335_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(554233344)))];
+            tensor<fp16, [1, 1500, 1280]> linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3335_to_fp16, x = var_3320_cast_fp16)[name = string("linear_181_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3339_to_fp16 = const()[name = string("op_3339_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(557510208)))];
+            tensor<fp16, [1280]> var_3340_to_fp16 = const()[name = string("op_3340_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(560787072)))];
+            tensor<fp16, [1, 1500, 1280]> linear_182_cast_fp16 = linear(bias = var_3340_to_fp16, weight = var_3339_to_fp16, x = var_3320_cast_fp16)[name = string("linear_182_cast_fp16")];
+            tensor<int32, [4]> var_3348 = const()[name = string("op_3348"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3349_cast_fp16 = reshape(shape = var_3348, x = linear_180_cast_fp16)[name = string("op_3349_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_284_to_fp16 = const()[name = string("const_284_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_123_cast_fp16 = mul(x = var_3349_cast_fp16, y = const_284_to_fp16)[name = string("q_123_cast_fp16")];
+            tensor<int32, [4]> var_3355 = const()[name = string("op_3355"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3356_cast_fp16 = reshape(shape = var_3355, x = linear_181_cast_fp16)[name = string("op_3356_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_123_cast_fp16 = mul(x = var_3356_cast_fp16, y = const_285_to_fp16)[name = string("k_123_cast_fp16")];
+            tensor<int32, [4]> var_3362 = const()[name = string("op_3362"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3363_cast_fp16 = reshape(shape = var_3362, x = linear_182_cast_fp16)[name = string("op_3363_cast_fp16")];
+            tensor<int32, [4]> var_3364 = const()[name = string("op_3364"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)];
+            bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_93 = transpose(perm = transpose_93_perm_0, x = k_123_cast_fp16)[name = string("transpose_102")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = q_123_cast_fp16)[name = string("transpose_103")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_92, y = transpose_93)[name = string("qk_61_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3368_cast_fp16 = softmax(axis = var_3304, x = qk_61_cast_fp16)[name = string("op_3368_cast_fp16")];
+            bool var_3370_transpose_x_0 = const()[name = string("op_3370_transpose_x_0"), val = bool(false)];
+            bool var_3370_transpose_y_0 = const()[name = string("op_3370_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_123_cast_fp16 = transpose(perm = var_3364, x = var_3363_cast_fp16)[name = string("transpose_101")];
+            tensor<fp16, [1, 20, 1500, 64]> var_3370_cast_fp16 = matmul(transpose_x = var_3370_transpose_x_0, transpose_y = var_3370_transpose_y_0, x = var_3368_cast_fp16, y = v_123_cast_fp16)[name = string("op_3370_cast_fp16")];
+            tensor<int32, [4]> var_3371 = const()[name = string("op_3371"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_30 = const()[name = string("concat_30"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3372_cast_fp16 = transpose(perm = var_3371, x = var_3370_cast_fp16)[name = string("transpose_100")];
+            tensor<fp16, [1, 1500, 1280]> x_371_cast_fp16 = reshape(shape = concat_30, x = var_3372_cast_fp16)[name = string("x_371_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3376_to_fp16 = const()[name = string("op_3376_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(560789696)))];
+            tensor<fp16, [1280]> var_3377_to_fp16 = const()[name = string("op_3377_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564066560)))];
+            tensor<fp16, [1, 1500, 1280]> linear_183_cast_fp16 = linear(bias = var_3377_to_fp16, weight = var_3376_to_fp16, x = x_371_cast_fp16)[name = string("linear_183_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_373_cast_fp16 = add(x = x_367_cast_fp16, y = linear_183_cast_fp16)[name = string("x_373_cast_fp16")];
+            tensor<int32, [1]> var_3384_axes_0 = const()[name = string("op_3384_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_30_mlp_ln_weight_to_fp16 = const()[name = string("blocks_30_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564069184)))];
+            tensor<fp16, [1280]> blocks_30_mlp_ln_bias_to_fp16 = const()[name = string("blocks_30_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564071808)))];
+            tensor<fp16, [1, 1500, 1280]> var_3384_cast_fp16 = layer_norm(axes = var_3384_axes_0, beta = blocks_30_mlp_ln_bias_to_fp16, epsilon = var_3310_to_fp16, gamma = blocks_30_mlp_ln_weight_to_fp16, x = x_373_cast_fp16)[name = string("op_3384_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3393_to_fp16 = const()[name = string("op_3393_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564074432)))];
+            tensor<fp16, [5120]> var_3394_to_fp16 = const()[name = string("op_3394_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(577181696)))];
+            tensor<fp16, [1, 1500, 5120]> linear_184_cast_fp16 = linear(bias = var_3394_to_fp16, weight = var_3393_to_fp16, x = var_3384_cast_fp16)[name = string("linear_184_cast_fp16")];
+            string x_377_mode_0 = const()[name = string("x_377_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_377_cast_fp16 = gelu(mode = x_377_mode_0, x = linear_184_cast_fp16)[name = string("x_377_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3399_to_fp16 = const()[name = string("op_3399_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(577192000)))];
+            tensor<fp16, [1280]> var_3400_to_fp16 = const()[name = string("op_3400_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590299264)))];
+            tensor<fp16, [1, 1500, 1280]> linear_185_cast_fp16 = linear(bias = var_3400_to_fp16, weight = var_3399_to_fp16, x = x_377_cast_fp16)[name = string("linear_185_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_379_cast_fp16 = add(x = x_373_cast_fp16, y = linear_185_cast_fp16)[name = string("x_379_cast_fp16")];
+            int32 var_3410 = const()[name = string("op_3410"), val = int32(-1)];
+            tensor<int32, [1]> var_3426_axes_0 = const()[name = string("op_3426_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_31_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_attn_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590301888)))];
+            tensor<fp16, [1280]> blocks_31_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_attn_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590304512)))];
+            fp16 var_3416_to_fp16 = const()[name = string("op_3416_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> var_3426_cast_fp16 = layer_norm(axes = var_3426_axes_0, beta = blocks_31_attn_ln_bias_to_fp16, epsilon = var_3416_to_fp16, gamma = blocks_31_attn_ln_weight_to_fp16, x = x_379_cast_fp16)[name = string("op_3426_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3437_to_fp16 = const()[name = string("op_3437_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590307136)))];
+            tensor<fp16, [1280]> var_3438_to_fp16 = const()[name = string("op_3438_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(593584000)))];
+            tensor<fp16, [1, 1500, 1280]> linear_186_cast_fp16 = linear(bias = var_3438_to_fp16, weight = var_3437_to_fp16, x = var_3426_cast_fp16)[name = string("linear_186_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3441_to_fp16 = const()[name = string("op_3441_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(593586624)))];
+            tensor<fp16, [1, 1500, 1280]> linear_187_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3441_to_fp16, x = var_3426_cast_fp16)[name = string("linear_187_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3445_to_fp16 = const()[name = string("op_3445_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(596863488)))];
+            tensor<fp16, [1280]> var_3446_to_fp16 = const()[name = string("op_3446_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(600140352)))];
+            tensor<fp16, [1, 1500, 1280]> linear_188_cast_fp16 = linear(bias = var_3446_to_fp16, weight = var_3445_to_fp16, x = var_3426_cast_fp16)[name = string("linear_188_cast_fp16")];
+            tensor<int32, [4]> var_3454 = const()[name = string("op_3454"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3455_cast_fp16 = reshape(shape = var_3454, x = linear_186_cast_fp16)[name = string("op_3455_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_286_to_fp16 = const()[name = string("const_286_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> q_cast_fp16 = mul(x = var_3455_cast_fp16, y = const_286_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_3461 = const()[name = string("op_3461"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3462_cast_fp16 = reshape(shape = var_3461, x = linear_187_cast_fp16)[name = string("op_3462_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 20, 64]> k_cast_fp16 = mul(x = var_3462_cast_fp16, y = const_287_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_3468 = const()[name = string("op_3468"), val = tensor<int32, [4]>([1, 1500, 20, -1])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3469_cast_fp16 = reshape(shape = var_3468, x = linear_188_cast_fp16)[name = string("op_3469_cast_fp16")];
+            tensor<int32, [4]> var_3470 = const()[name = string("op_3470"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 20, 64, 1500]> transpose_95 = transpose(perm = transpose_95_perm_0, x = k_cast_fp16)[name = string("transpose_98")];
+            tensor<fp16, [1, 20, 1500, 64]> transpose_94 = transpose(perm = transpose_94_perm_0, x = q_cast_fp16)[name = string("transpose_99")];
+            tensor<fp16, [1, 20, 1500, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_94, y = transpose_95)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3474_cast_fp16 = softmax(axis = var_3410, x = qk_cast_fp16)[name = string("op_3474_cast_fp16")];
+            bool var_3476_transpose_x_0 = const()[name = string("op_3476_transpose_x_0"), val = bool(false)];
+            bool var_3476_transpose_y_0 = const()[name = string("op_3476_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 64]> v_cast_fp16 = transpose(perm = var_3470, x = var_3469_cast_fp16)[name = string("transpose_97")];
+            tensor<fp16, [1, 20, 1500, 64]> var_3476_cast_fp16 = matmul(transpose_x = var_3476_transpose_x_0, transpose_y = var_3476_transpose_y_0, x = var_3474_cast_fp16, y = v_cast_fp16)[name = string("op_3476_cast_fp16")];
+            tensor<int32, [4]> var_3477 = const()[name = string("op_3477"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_31 = const()[name = string("concat_31"), val = tensor<int32, [3]>([1, 1500, 1280])];
+            tensor<fp16, [1, 1500, 20, 64]> var_3478_cast_fp16 = transpose(perm = var_3477, x = var_3476_cast_fp16)[name = string("transpose_96")];
+            tensor<fp16, [1, 1500, 1280]> x_383_cast_fp16 = reshape(shape = concat_31, x = var_3478_cast_fp16)[name = string("x_383_cast_fp16")];
+            tensor<fp16, [1280, 1280]> var_3482_to_fp16 = const()[name = string("op_3482_to_fp16"), val = tensor<fp16, [1280, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(600142976)))];
+            tensor<fp16, [1280]> var_3483_to_fp16 = const()[name = string("op_3483_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603419840)))];
+            tensor<fp16, [1, 1500, 1280]> linear_189_cast_fp16 = linear(bias = var_3483_to_fp16, weight = var_3482_to_fp16, x = x_383_cast_fp16)[name = string("linear_189_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_385_cast_fp16 = add(x = x_379_cast_fp16, y = linear_189_cast_fp16)[name = string("x_385_cast_fp16")];
+            tensor<int32, [1]> var_3490_axes_0 = const()[name = string("op_3490_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> blocks_31_mlp_ln_weight_to_fp16 = const()[name = string("blocks_31_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603422464)))];
+            tensor<fp16, [1280]> blocks_31_mlp_ln_bias_to_fp16 = const()[name = string("blocks_31_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603425088)))];
+            tensor<fp16, [1, 1500, 1280]> var_3490_cast_fp16 = layer_norm(axes = var_3490_axes_0, beta = blocks_31_mlp_ln_bias_to_fp16, epsilon = var_3416_to_fp16, gamma = blocks_31_mlp_ln_weight_to_fp16, x = x_385_cast_fp16)[name = string("op_3490_cast_fp16")];
+            tensor<fp16, [5120, 1280]> var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = tensor<fp16, [5120, 1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603427712)))];
+            tensor<fp16, [5120]> var_3500_to_fp16 = const()[name = string("op_3500_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(616534976)))];
+            tensor<fp16, [1, 1500, 5120]> linear_190_cast_fp16 = linear(bias = var_3500_to_fp16, weight = var_3499_to_fp16, x = var_3490_cast_fp16)[name = string("linear_190_cast_fp16")];
+            string x_389_mode_0 = const()[name = string("x_389_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 5120]> x_389_cast_fp16 = gelu(mode = x_389_mode_0, x = linear_190_cast_fp16)[name = string("x_389_cast_fp16")];
+            tensor<fp16, [1280, 5120]> var_3505_to_fp16 = const()[name = string("op_3505_to_fp16"), val = tensor<fp16, [1280, 5120]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(616545280)))];
+            tensor<fp16, [1280]> var_3506_to_fp16 = const()[name = string("op_3506_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629652544)))];
+            tensor<fp16, [1, 1500, 1280]> linear_191_cast_fp16 = linear(bias = var_3506_to_fp16, weight = var_3505_to_fp16, x = x_389_cast_fp16)[name = string("linear_191_cast_fp16")];
+            tensor<fp16, [1, 1500, 1280]> x_cast_fp16 = add(x = x_385_cast_fp16, y = linear_191_cast_fp16)[name = string("x_cast_fp16")];
+            tensor<int32, [1]> var_3519_axes_0 = const()[name = string("op_3519_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1280]> ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629655168)))];
+            tensor<fp16, [1280]> ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629657792)))];
+            fp16 var_3510_to_fp16 = const()[name = string("op_3510_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1280]> output = layer_norm(axes = var_3519_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_3510_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_3519_cast_fp16")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/large-v3/encoder.mlmodelc/model1/weights/1-weight.bin b/large-v3/encoder.mlmodelc/model1/weights/1-weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d74dcc72eb085c49244201f90644c01049a0a54a
--- /dev/null
+++ b/large-v3/encoder.mlmodelc/model1/weights/1-weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f870130a04ffe31ab992466d0f4a0da493140364dae94a946e4fd5aaf80e8af
+size 629660416
diff --git a/large-v3/model_dims.json b/large-v3/model_dims.json
new file mode 100644
index 0000000000000000000000000000000000000000..4e5155a7f1c64dbc222d61edfe1e871c529efe4f
--- /dev/null
+++ b/large-v3/model_dims.json
@@ -0,0 +1,12 @@
+{
+  "n_mels": 128,
+  "n_audio_ctx": 1500,
+  "n_audio_state": 1280,
+  "n_audio_head": 20,
+  "n_audio_layer": 32,
+  "n_vocab": 51866,
+  "n_text_ctx": 448,
+  "n_text_state": 1280,
+  "n_text_head": 20,
+  "n_text_layer": 32
+}
\ No newline at end of file
diff --git a/medium/decoder_first.mlmodelc/analytics/coremldata.bin b/medium/decoder_first.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..346d4ba25ea5a3de75867619133a1dae47f5bd40
--- /dev/null
+++ b/medium/decoder_first.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88974e233d54396d36bcc56cb3529205db7703529c6fd653b711e4a0d45ccea8
+size 243
diff --git a/medium/decoder_first.mlmodelc/coremldata.bin b/medium/decoder_first.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..825710543896f917d3afd2630cd980008fa14610
--- /dev/null
+++ b/medium/decoder_first.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11f60bbbb075152da3db06cf9e036d38ae791a5c37ccb1530a22501166053edf
+size 453
diff --git a/medium/decoder_first.mlmodelc/metadata.json b/medium/decoder_first.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..69768ea5b31d5ef957c2d6853b3a4bc4f08ba7d5
--- /dev/null
+++ b/medium/decoder_first.mlmodelc/metadata.json
@@ -0,0 +1,106 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "dummy",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.writeState" : 50,
+      "Shape" : 48,
+      "Ios18.linear" : 48,
+      "Identity" : 1,
+      "Ios18.gather" : 48,
+      "Ios18.concat" : 48,
+      "Ios18.sliceUpdate" : 50,
+      "Ios18.cast" : 96,
+      "Ios18.expandDims" : 48,
+      "Ios18.readState" : 50
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 24 × 1 × 448 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[24, 1, 448, 1024]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 24 × 1 × 448 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[24, 1, 448, 1024]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 24 × 1 × 1500 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[24, 1, 1500, 1024]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 24 × 1 × 1500 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[24, 1, 1500, 1024]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...1500 × 1024",
+        "shapeRange" : "[[1, 1], [1, 1500], [1024, 1024]]",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1024)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1, 1024]",
+        "name" : "audio_data",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_first",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/medium/decoder_first.mlmodelc/model.mil b/medium/decoder_first.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..53c1f14bee99cd2905ac08f508a89178e0096a53
--- /dev/null
+++ b/medium/decoder_first.mlmodelc/model.mil
@@ -0,0 +1,1395 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [1, ?, 1024]> audio_data, state<tensor<fp16, [24, 1, 448, 1024]>> k_cache1, state<tensor<fp16, [24, 1, 1500, 1024]>> k_cache2, state<tensor<fp16, [24, 1, 448, 1024]>> v_cache1, state<tensor<fp16, [24, 1, 1500, 1024]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 1024]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [1024, 1024]]}})))] {
+            tensor<fp16, [1, ?, 1024]> dummy = identity(x = audio_data)[name = string("identity_0")];
+            tensor<fp16, [24, 1, 448, 1024]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor<fp16, [24, 1, 448, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_50_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_51_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<fp16, [24, 1, 1500, 1024]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<fp16, [1024, 1024]> var_115_to_fp16 = const()[name = string("op_115_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22020224)))];
+            tensor<fp16, [1024]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24117440)))];
+            tensor<fp16, [1, ?, 1024]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_115_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_119_to_fp16 = const()[name = string("op_119_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24119552)))];
+            tensor<fp16, [1024]> var_120_to_fp16 = const()[name = string("op_120_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26216768)))];
+            tensor<fp16, [1, ?, 1024]> linear_1_cast_fp16 = linear(bias = var_120_to_fp16, weight = var_119_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")];
+            tensor<int32, [3]> var_122_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_122_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_122_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_122_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [3]> var_122_shape_cast_fp16_to_int16 = cast(dtype = var_122_shape_cast_fp16_to_int16_dtype_0, x = var_122_shape_cast_fp16)[name = string("cast_151")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_122_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_150")];
+            tensor<int32, [1]> expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")];
+            tensor<int32, [4]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
+            bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_52_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_52 = read_state(input = k_cache2)[name = string("coreml_update_state_52")];
+            tensor<int32, [3]> var_127_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_127_shape_cast_fp16")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_127_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_127_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_127_shape_cast_fp16_to_uint16 = cast(dtype = var_127_shape_cast_fp16_to_uint16_dtype_0, x = var_127_shape_cast_fp16)[name = string("cast_149")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_127_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_148")];
+            tensor<int32, [1]> expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")];
+            tensor<int32, [4]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)];
+            bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_53_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_53 = read_state(input = v_cache2)[name = string("coreml_update_state_53")];
+            tensor<fp16, [1024, 1024]> var_149_to_fp16 = const()[name = string("op_149_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26218880)))];
+            tensor<fp16, [1, ?, 1024]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_149_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_153_to_fp16 = const()[name = string("op_153_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28316096)))];
+            tensor<fp16, [1024]> var_154_to_fp16 = const()[name = string("op_154_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30413312)))];
+            tensor<fp16, [1, ?, 1024]> linear_3_cast_fp16 = linear(bias = var_154_to_fp16, weight = var_153_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")];
+            tensor<int32, [3]> var_156_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_156_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_156_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_156_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_156_shape_cast_fp16_to_uint16 = cast(dtype = var_156_shape_cast_fp16_to_uint16_dtype_0, x = var_156_shape_cast_fp16)[name = string("cast_147")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_156_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_146")];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")];
+            tensor<int32, [4]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)];
+            bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_52)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_54_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_54 = read_state(input = k_cache2)[name = string("coreml_update_state_54")];
+            tensor<int32, [3]> var_161_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_161_shape_cast_fp16")];
+            int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)];
+            int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)];
+            bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)];
+            string var_161_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_161_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_161_shape_cast_fp16_to_uint16 = cast(dtype = var_161_shape_cast_fp16_to_uint16_dtype_0, x = var_161_shape_cast_fp16)[name = string("cast_145")];
+            uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_161_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")];
+            string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_144")];
+            tensor<int32, [1]> expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")];
+            tensor<int32, [4]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_53)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_55_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_55 = read_state(input = v_cache2)[name = string("coreml_update_state_55")];
+            tensor<fp16, [1024, 1024]> var_183_to_fp16 = const()[name = string("op_183_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30415424)))];
+            tensor<fp16, [1, ?, 1024]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_183_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_187_to_fp16 = const()[name = string("op_187_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32512640)))];
+            tensor<fp16, [1024]> var_188_to_fp16 = const()[name = string("op_188_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34609856)))];
+            tensor<fp16, [1, ?, 1024]> linear_5_cast_fp16 = linear(bias = var_188_to_fp16, weight = var_187_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")];
+            tensor<int32, [3]> var_190_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_190_shape_cast_fp16")];
+            int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)];
+            int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)];
+            bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)];
+            string var_190_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_190_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_190_shape_cast_fp16_to_uint16 = cast(dtype = var_190_shape_cast_fp16_to_uint16_dtype_0, x = var_190_shape_cast_fp16)[name = string("cast_143")];
+            uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_190_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")];
+            string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_142")];
+            tensor<int32, [1]> expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")];
+            tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)];
+            bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_54)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_56_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_56 = read_state(input = k_cache2)[name = string("coreml_update_state_56")];
+            tensor<int32, [3]> var_195_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_195_shape_cast_fp16")];
+            int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)];
+            int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)];
+            bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)];
+            string var_195_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_195_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_195_shape_cast_fp16_to_uint16 = cast(dtype = var_195_shape_cast_fp16_to_uint16_dtype_0, x = var_195_shape_cast_fp16)[name = string("cast_141")];
+            uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_195_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")];
+            string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_140")];
+            tensor<int32, [1]> expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")];
+            tensor<int32, [4]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)];
+            bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_55)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_57_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_57 = read_state(input = v_cache2)[name = string("coreml_update_state_57")];
+            tensor<fp16, [1024, 1024]> var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34611968)))];
+            tensor<fp16, [1, ?, 1024]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_217_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_221_to_fp16 = const()[name = string("op_221_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36709184)))];
+            tensor<fp16, [1024]> var_222_to_fp16 = const()[name = string("op_222_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38806400)))];
+            tensor<fp16, [1, ?, 1024]> linear_7_cast_fp16 = linear(bias = var_222_to_fp16, weight = var_221_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")];
+            tensor<int32, [3]> var_224_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_224_shape_cast_fp16")];
+            int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)];
+            int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)];
+            bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)];
+            string var_224_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_224_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_224_shape_cast_fp16_to_uint16 = cast(dtype = var_224_shape_cast_fp16_to_uint16_dtype_0, x = var_224_shape_cast_fp16)[name = string("cast_139")];
+            uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_224_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")];
+            string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_138")];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")];
+            tensor<int32, [4]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)];
+            bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_56)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_58_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_58 = read_state(input = k_cache2)[name = string("coreml_update_state_58")];
+            tensor<int32, [3]> var_229_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_229_shape_cast_fp16")];
+            int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)];
+            int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)];
+            bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)];
+            string var_229_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_229_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_229_shape_cast_fp16_to_uint16 = cast(dtype = var_229_shape_cast_fp16_to_uint16_dtype_0, x = var_229_shape_cast_fp16)[name = string("cast_137")];
+            uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_229_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")];
+            string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_136")];
+            tensor<int32, [1]> expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")];
+            tensor<int32, [4]> concat_26 = const()[name = string("concat_26"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_57)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_59_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_59 = read_state(input = v_cache2)[name = string("coreml_update_state_59")];
+            tensor<fp16, [1024, 1024]> var_251_to_fp16 = const()[name = string("op_251_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38808512)))];
+            tensor<fp16, [1, ?, 1024]> linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_251_to_fp16, x = audio_data)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_255_to_fp16 = const()[name = string("op_255_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40905728)))];
+            tensor<fp16, [1024]> var_256_to_fp16 = const()[name = string("op_256_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43002944)))];
+            tensor<fp16, [1, ?, 1024]> linear_9_cast_fp16 = linear(bias = var_256_to_fp16, weight = var_255_to_fp16, x = audio_data)[name = string("linear_9_cast_fp16")];
+            tensor<int32, [3]> var_258_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_258_shape_cast_fp16")];
+            int32 gather_8_axis_0 = const()[name = string("gather_8_axis_0"), val = int32(0)];
+            int32 gather_8_batch_dims_0 = const()[name = string("gather_8_batch_dims_0"), val = int32(0)];
+            bool gather_8_validate_indices_0 = const()[name = string("gather_8_validate_indices_0"), val = bool(false)];
+            string var_258_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_258_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_8_to_uint16 = const()[name = string("select_8_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_258_shape_cast_fp16_to_uint16 = cast(dtype = var_258_shape_cast_fp16_to_uint16_dtype_0, x = var_258_shape_cast_fp16)[name = string("cast_135")];
+            uint16 gather_8_cast_uint16 = gather(axis = gather_8_axis_0, batch_dims = gather_8_batch_dims_0, indices = select_8_to_uint16, validate_indices = gather_8_validate_indices_0, x = var_258_shape_cast_fp16_to_uint16)[name = string("gather_8_cast_uint16")];
+            string gather_8_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_8_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_43_axes_0 = const()[name = string("expand_dims_43_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_8_cast_uint16_to_int32 = cast(dtype = gather_8_cast_uint16_to_int32_dtype_0, x = gather_8_cast_uint16)[name = string("cast_134")];
+            tensor<int32, [1]> expand_dims_43 = expand_dims(axes = expand_dims_43_axes_0, x = gather_8_cast_uint16_to_int32)[name = string("expand_dims_43")];
+            tensor<int32, [4]> concat_29 = const()[name = string("concat_29"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [1]> concat_30_values0_0 = const()[name = string("concat_30_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_30_values1_0 = const()[name = string("concat_30_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_30_values3_0 = const()[name = string("concat_30_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)];
+            bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (concat_30_values0_0, concat_30_values1_0, expand_dims_43, concat_30_values3_0))[name = string("concat_30")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_29, begin_mask = k_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_30, end_mask = k_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_5_stride_0, update = linear_8_cast_fp16, x = coreml_update_state_58)[name = string("k_cache2_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_5_cast_fp16, input = k_cache2)[name = string("coreml_update_state_60_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_60 = read_state(input = k_cache2)[name = string("coreml_update_state_60")];
+            tensor<int32, [3]> var_263_shape_cast_fp16 = shape(x = linear_9_cast_fp16)[name = string("op_263_shape_cast_fp16")];
+            int32 gather_9_axis_0 = const()[name = string("gather_9_axis_0"), val = int32(0)];
+            int32 gather_9_batch_dims_0 = const()[name = string("gather_9_batch_dims_0"), val = int32(0)];
+            bool gather_9_validate_indices_0 = const()[name = string("gather_9_validate_indices_0"), val = bool(false)];
+            string var_263_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_263_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_9_to_uint16 = const()[name = string("select_9_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_263_shape_cast_fp16_to_uint16 = cast(dtype = var_263_shape_cast_fp16_to_uint16_dtype_0, x = var_263_shape_cast_fp16)[name = string("cast_133")];
+            uint16 gather_9_cast_uint16 = gather(axis = gather_9_axis_0, batch_dims = gather_9_batch_dims_0, indices = select_9_to_uint16, validate_indices = gather_9_validate_indices_0, x = var_263_shape_cast_fp16_to_uint16)[name = string("gather_9_cast_uint16")];
+            string gather_9_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_9_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_47_axes_0 = const()[name = string("expand_dims_47_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_9_cast_uint16_to_int32 = cast(dtype = gather_9_cast_uint16_to_int32_dtype_0, x = gather_9_cast_uint16)[name = string("cast_132")];
+            tensor<int32, [1]> expand_dims_47 = expand_dims(axes = expand_dims_47_axes_0, x = gather_9_cast_uint16_to_int32)[name = string("expand_dims_47")];
+            tensor<int32, [4]> concat_32 = const()[name = string("concat_32"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [1]> concat_33_values0_0 = const()[name = string("concat_33_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_33_values1_0 = const()[name = string("concat_33_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_33_values3_0 = const()[name = string("concat_33_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)];
+            bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (concat_33_values0_0, concat_33_values1_0, expand_dims_47, concat_33_values3_0))[name = string("concat_33")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_32, begin_mask = v_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_33, end_mask = v_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_5_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_59)[name = string("v_cache2_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_5_cast_fp16, input = v_cache2)[name = string("coreml_update_state_61_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_61 = read_state(input = v_cache2)[name = string("coreml_update_state_61")];
+            tensor<fp16, [1024, 1024]> var_285_to_fp16 = const()[name = string("op_285_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43005056)))];
+            tensor<fp16, [1, ?, 1024]> linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_285_to_fp16, x = audio_data)[name = string("linear_10_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_289_to_fp16 = const()[name = string("op_289_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45102272)))];
+            tensor<fp16, [1024]> var_290_to_fp16 = const()[name = string("op_290_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47199488)))];
+            tensor<fp16, [1, ?, 1024]> linear_11_cast_fp16 = linear(bias = var_290_to_fp16, weight = var_289_to_fp16, x = audio_data)[name = string("linear_11_cast_fp16")];
+            tensor<int32, [3]> var_292_shape_cast_fp16 = shape(x = linear_10_cast_fp16)[name = string("op_292_shape_cast_fp16")];
+            int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)];
+            int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)];
+            bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)];
+            string var_292_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_292_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_292_shape_cast_fp16_to_uint16 = cast(dtype = var_292_shape_cast_fp16_to_uint16_dtype_0, x = var_292_shape_cast_fp16)[name = string("cast_131")];
+            uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_292_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")];
+            string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_130")];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = gather_10_cast_uint16_to_int32)[name = string("expand_dims_51")];
+            tensor<int32, [4]> concat_35 = const()[name = string("concat_35"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [1]> concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)];
+            bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, expand_dims_51, concat_36_values3_0))[name = string("concat_36")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_35, begin_mask = k_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_36, end_mask = k_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_6_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_60)[name = string("k_cache2_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_6_cast_fp16, input = k_cache2)[name = string("coreml_update_state_62_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_62 = read_state(input = k_cache2)[name = string("coreml_update_state_62")];
+            tensor<int32, [3]> var_297_shape_cast_fp16 = shape(x = linear_11_cast_fp16)[name = string("op_297_shape_cast_fp16")];
+            int32 gather_11_axis_0 = const()[name = string("gather_11_axis_0"), val = int32(0)];
+            int32 gather_11_batch_dims_0 = const()[name = string("gather_11_batch_dims_0"), val = int32(0)];
+            bool gather_11_validate_indices_0 = const()[name = string("gather_11_validate_indices_0"), val = bool(false)];
+            string var_297_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_297_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_11_to_uint16 = const()[name = string("select_11_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_297_shape_cast_fp16_to_uint16 = cast(dtype = var_297_shape_cast_fp16_to_uint16_dtype_0, x = var_297_shape_cast_fp16)[name = string("cast_129")];
+            uint16 gather_11_cast_uint16 = gather(axis = gather_11_axis_0, batch_dims = gather_11_batch_dims_0, indices = select_11_to_uint16, validate_indices = gather_11_validate_indices_0, x = var_297_shape_cast_fp16_to_uint16)[name = string("gather_11_cast_uint16")];
+            string gather_11_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_11_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_55_axes_0 = const()[name = string("expand_dims_55_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_11_cast_uint16_to_int32 = cast(dtype = gather_11_cast_uint16_to_int32_dtype_0, x = gather_11_cast_uint16)[name = string("cast_128")];
+            tensor<int32, [1]> expand_dims_55 = expand_dims(axes = expand_dims_55_axes_0, x = gather_11_cast_uint16_to_int32)[name = string("expand_dims_55")];
+            tensor<int32, [4]> concat_38 = const()[name = string("concat_38"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [1]> concat_39_values0_0 = const()[name = string("concat_39_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)];
+            bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (concat_39_values0_0, concat_39_values1_0, expand_dims_55, concat_39_values3_0))[name = string("concat_39")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_38, begin_mask = v_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_39, end_mask = v_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_6_stride_0, update = linear_11_cast_fp16, x = coreml_update_state_61)[name = string("v_cache2_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_6_cast_fp16, input = v_cache2)[name = string("coreml_update_state_63_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_63 = read_state(input = v_cache2)[name = string("coreml_update_state_63")];
+            tensor<fp16, [1024, 1024]> var_319_to_fp16 = const()[name = string("op_319_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47201600)))];
+            tensor<fp16, [1, ?, 1024]> linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_319_to_fp16, x = audio_data)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_323_to_fp16 = const()[name = string("op_323_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49298816)))];
+            tensor<fp16, [1024]> var_324_to_fp16 = const()[name = string("op_324_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51396032)))];
+            tensor<fp16, [1, ?, 1024]> linear_13_cast_fp16 = linear(bias = var_324_to_fp16, weight = var_323_to_fp16, x = audio_data)[name = string("linear_13_cast_fp16")];
+            tensor<int32, [3]> var_326_shape_cast_fp16 = shape(x = linear_12_cast_fp16)[name = string("op_326_shape_cast_fp16")];
+            int32 gather_12_axis_0 = const()[name = string("gather_12_axis_0"), val = int32(0)];
+            int32 gather_12_batch_dims_0 = const()[name = string("gather_12_batch_dims_0"), val = int32(0)];
+            bool gather_12_validate_indices_0 = const()[name = string("gather_12_validate_indices_0"), val = bool(false)];
+            string var_326_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_326_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_12_to_uint16 = const()[name = string("select_12_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_326_shape_cast_fp16_to_uint16 = cast(dtype = var_326_shape_cast_fp16_to_uint16_dtype_0, x = var_326_shape_cast_fp16)[name = string("cast_127")];
+            uint16 gather_12_cast_uint16 = gather(axis = gather_12_axis_0, batch_dims = gather_12_batch_dims_0, indices = select_12_to_uint16, validate_indices = gather_12_validate_indices_0, x = var_326_shape_cast_fp16_to_uint16)[name = string("gather_12_cast_uint16")];
+            string gather_12_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_12_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_59_axes_0 = const()[name = string("expand_dims_59_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_12_cast_uint16_to_int32 = cast(dtype = gather_12_cast_uint16_to_int32_dtype_0, x = gather_12_cast_uint16)[name = string("cast_126")];
+            tensor<int32, [1]> expand_dims_59 = expand_dims(axes = expand_dims_59_axes_0, x = gather_12_cast_uint16_to_int32)[name = string("expand_dims_59")];
+            tensor<int32, [4]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [1]> concat_42_values0_0 = const()[name = string("concat_42_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_42_values1_0 = const()[name = string("concat_42_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_42_values3_0 = const()[name = string("concat_42_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)];
+            bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (concat_42_values0_0, concat_42_values1_0, expand_dims_59, concat_42_values3_0))[name = string("concat_42")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_41, begin_mask = k_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_42, end_mask = k_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_7_stride_0, update = linear_12_cast_fp16, x = coreml_update_state_62)[name = string("k_cache2_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_7_cast_fp16, input = k_cache2)[name = string("coreml_update_state_64_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_64 = read_state(input = k_cache2)[name = string("coreml_update_state_64")];
+            tensor<int32, [3]> var_331_shape_cast_fp16 = shape(x = linear_13_cast_fp16)[name = string("op_331_shape_cast_fp16")];
+            int32 gather_13_axis_0 = const()[name = string("gather_13_axis_0"), val = int32(0)];
+            int32 gather_13_batch_dims_0 = const()[name = string("gather_13_batch_dims_0"), val = int32(0)];
+            bool gather_13_validate_indices_0 = const()[name = string("gather_13_validate_indices_0"), val = bool(false)];
+            string var_331_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_331_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_13_to_uint16 = const()[name = string("select_13_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_331_shape_cast_fp16_to_uint16 = cast(dtype = var_331_shape_cast_fp16_to_uint16_dtype_0, x = var_331_shape_cast_fp16)[name = string("cast_125")];
+            uint16 gather_13_cast_uint16 = gather(axis = gather_13_axis_0, batch_dims = gather_13_batch_dims_0, indices = select_13_to_uint16, validate_indices = gather_13_validate_indices_0, x = var_331_shape_cast_fp16_to_uint16)[name = string("gather_13_cast_uint16")];
+            string gather_13_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_13_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_63_axes_0 = const()[name = string("expand_dims_63_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_13_cast_uint16_to_int32 = cast(dtype = gather_13_cast_uint16_to_int32_dtype_0, x = gather_13_cast_uint16)[name = string("cast_124")];
+            tensor<int32, [1]> expand_dims_63 = expand_dims(axes = expand_dims_63_axes_0, x = gather_13_cast_uint16_to_int32)[name = string("expand_dims_63")];
+            tensor<int32, [4]> concat_44 = const()[name = string("concat_44"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [1]> concat_45_values0_0 = const()[name = string("concat_45_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_45_values1_0 = const()[name = string("concat_45_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_45_values3_0 = const()[name = string("concat_45_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_45_axis_0 = const()[name = string("concat_45_axis_0"), val = int32(0)];
+            bool concat_45_interleave_0 = const()[name = string("concat_45_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_45 = concat(axis = concat_45_axis_0, interleave = concat_45_interleave_0, values = (concat_45_values0_0, concat_45_values1_0, expand_dims_63, concat_45_values3_0))[name = string("concat_45")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_44, begin_mask = v_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_45, end_mask = v_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_7_stride_0, update = linear_13_cast_fp16, x = coreml_update_state_63)[name = string("v_cache2_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_7_cast_fp16, input = v_cache2)[name = string("coreml_update_state_65_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_65 = read_state(input = v_cache2)[name = string("coreml_update_state_65")];
+            tensor<fp16, [1024, 1024]> var_353_to_fp16 = const()[name = string("op_353_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51398144)))];
+            tensor<fp16, [1, ?, 1024]> linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_353_to_fp16, x = audio_data)[name = string("linear_14_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_357_to_fp16 = const()[name = string("op_357_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53495360)))];
+            tensor<fp16, [1024]> var_358_to_fp16 = const()[name = string("op_358_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55592576)))];
+            tensor<fp16, [1, ?, 1024]> linear_15_cast_fp16 = linear(bias = var_358_to_fp16, weight = var_357_to_fp16, x = audio_data)[name = string("linear_15_cast_fp16")];
+            tensor<int32, [3]> var_360_shape_cast_fp16 = shape(x = linear_14_cast_fp16)[name = string("op_360_shape_cast_fp16")];
+            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
+            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
+            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
+            string var_360_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_360_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_360_shape_cast_fp16_to_uint16 = cast(dtype = var_360_shape_cast_fp16_to_uint16_dtype_0, x = var_360_shape_cast_fp16)[name = string("cast_123")];
+            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_360_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
+            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_122")];
+            tensor<int32, [1]> expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = gather_14_cast_uint16_to_int32)[name = string("expand_dims_67")];
+            tensor<int32, [4]> concat_47 = const()[name = string("concat_47"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_48_values1_0 = const()[name = string("concat_48_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_48_values3_0 = const()[name = string("concat_48_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
+            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, concat_48_values1_0, expand_dims_67, concat_48_values3_0))[name = string("concat_48")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_47, begin_mask = k_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_48, end_mask = k_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_8_stride_0, update = linear_14_cast_fp16, x = coreml_update_state_64)[name = string("k_cache2_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_8_cast_fp16, input = k_cache2)[name = string("coreml_update_state_66_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_66 = read_state(input = k_cache2)[name = string("coreml_update_state_66")];
+            tensor<int32, [3]> var_365_shape_cast_fp16 = shape(x = linear_15_cast_fp16)[name = string("op_365_shape_cast_fp16")];
+            int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)];
+            int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)];
+            bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)];
+            string var_365_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_365_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_365_shape_cast_fp16_to_uint16 = cast(dtype = var_365_shape_cast_fp16_to_uint16_dtype_0, x = var_365_shape_cast_fp16)[name = string("cast_121")];
+            uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_365_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")];
+            string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_71_axes_0 = const()[name = string("expand_dims_71_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_120")];
+            tensor<int32, [1]> expand_dims_71 = expand_dims(axes = expand_dims_71_axes_0, x = gather_15_cast_uint16_to_int32)[name = string("expand_dims_71")];
+            tensor<int32, [4]> concat_50 = const()[name = string("concat_50"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [1]> concat_51_values0_0 = const()[name = string("concat_51_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)];
+            bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (concat_51_values0_0, concat_51_values1_0, expand_dims_71, concat_51_values3_0))[name = string("concat_51")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_50, begin_mask = v_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_51, end_mask = v_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_8_stride_0, update = linear_15_cast_fp16, x = coreml_update_state_65)[name = string("v_cache2_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_8_cast_fp16, input = v_cache2)[name = string("coreml_update_state_67_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_67 = read_state(input = v_cache2)[name = string("coreml_update_state_67")];
+            tensor<fp16, [1024, 1024]> var_387_to_fp16 = const()[name = string("op_387_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55594688)))];
+            tensor<fp16, [1, ?, 1024]> linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_387_to_fp16, x = audio_data)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_391_to_fp16 = const()[name = string("op_391_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57691904)))];
+            tensor<fp16, [1024]> var_392_to_fp16 = const()[name = string("op_392_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59789120)))];
+            tensor<fp16, [1, ?, 1024]> linear_17_cast_fp16 = linear(bias = var_392_to_fp16, weight = var_391_to_fp16, x = audio_data)[name = string("linear_17_cast_fp16")];
+            tensor<int32, [3]> var_394_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_394_shape_cast_fp16")];
+            int32 gather_16_axis_0 = const()[name = string("gather_16_axis_0"), val = int32(0)];
+            int32 gather_16_batch_dims_0 = const()[name = string("gather_16_batch_dims_0"), val = int32(0)];
+            bool gather_16_validate_indices_0 = const()[name = string("gather_16_validate_indices_0"), val = bool(false)];
+            string var_394_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_394_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_16_to_uint16 = const()[name = string("select_16_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_394_shape_cast_fp16_to_uint16 = cast(dtype = var_394_shape_cast_fp16_to_uint16_dtype_0, x = var_394_shape_cast_fp16)[name = string("cast_119")];
+            uint16 gather_16_cast_uint16 = gather(axis = gather_16_axis_0, batch_dims = gather_16_batch_dims_0, indices = select_16_to_uint16, validate_indices = gather_16_validate_indices_0, x = var_394_shape_cast_fp16_to_uint16)[name = string("gather_16_cast_uint16")];
+            string gather_16_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_16_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_75_axes_0 = const()[name = string("expand_dims_75_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_16_cast_uint16_to_int32 = cast(dtype = gather_16_cast_uint16_to_int32_dtype_0, x = gather_16_cast_uint16)[name = string("cast_118")];
+            tensor<int32, [1]> expand_dims_75 = expand_dims(axes = expand_dims_75_axes_0, x = gather_16_cast_uint16_to_int32)[name = string("expand_dims_75")];
+            tensor<int32, [4]> concat_53 = const()[name = string("concat_53"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [1]> concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_54_values1_0 = const()[name = string("concat_54_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_54_values3_0 = const()[name = string("concat_54_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
+            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, concat_54_values1_0, expand_dims_75, concat_54_values3_0))[name = string("concat_54")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_53, begin_mask = k_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_54, end_mask = k_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_9_stride_0, update = linear_16_cast_fp16, x = coreml_update_state_66)[name = string("k_cache2_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_9_cast_fp16, input = k_cache2)[name = string("coreml_update_state_68_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_68 = read_state(input = k_cache2)[name = string("coreml_update_state_68")];
+            tensor<int32, [3]> var_399_shape_cast_fp16 = shape(x = linear_17_cast_fp16)[name = string("op_399_shape_cast_fp16")];
+            int32 gather_17_axis_0 = const()[name = string("gather_17_axis_0"), val = int32(0)];
+            int32 gather_17_batch_dims_0 = const()[name = string("gather_17_batch_dims_0"), val = int32(0)];
+            bool gather_17_validate_indices_0 = const()[name = string("gather_17_validate_indices_0"), val = bool(false)];
+            string var_399_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_399_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_17_to_uint16 = const()[name = string("select_17_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_399_shape_cast_fp16_to_uint16 = cast(dtype = var_399_shape_cast_fp16_to_uint16_dtype_0, x = var_399_shape_cast_fp16)[name = string("cast_117")];
+            uint16 gather_17_cast_uint16 = gather(axis = gather_17_axis_0, batch_dims = gather_17_batch_dims_0, indices = select_17_to_uint16, validate_indices = gather_17_validate_indices_0, x = var_399_shape_cast_fp16_to_uint16)[name = string("gather_17_cast_uint16")];
+            string gather_17_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_17_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_79_axes_0 = const()[name = string("expand_dims_79_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_17_cast_uint16_to_int32 = cast(dtype = gather_17_cast_uint16_to_int32_dtype_0, x = gather_17_cast_uint16)[name = string("cast_116")];
+            tensor<int32, [1]> expand_dims_79 = expand_dims(axes = expand_dims_79_axes_0, x = gather_17_cast_uint16_to_int32)[name = string("expand_dims_79")];
+            tensor<int32, [4]> concat_56 = const()[name = string("concat_56"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [1]> concat_57_values0_0 = const()[name = string("concat_57_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)];
+            bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (concat_57_values0_0, concat_57_values1_0, expand_dims_79, concat_57_values3_0))[name = string("concat_57")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_56, begin_mask = v_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_57, end_mask = v_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_9_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_67)[name = string("v_cache2_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_9_cast_fp16, input = v_cache2)[name = string("coreml_update_state_69_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_69 = read_state(input = v_cache2)[name = string("coreml_update_state_69")];
+            tensor<fp16, [1024, 1024]> var_421_to_fp16 = const()[name = string("op_421_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59791232)))];
+            tensor<fp16, [1, ?, 1024]> linear_18_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_421_to_fp16, x = audio_data)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_425_to_fp16 = const()[name = string("op_425_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61888448)))];
+            tensor<fp16, [1024]> var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63985664)))];
+            tensor<fp16, [1, ?, 1024]> linear_19_cast_fp16 = linear(bias = var_426_to_fp16, weight = var_425_to_fp16, x = audio_data)[name = string("linear_19_cast_fp16")];
+            tensor<int32, [3]> var_428_shape_cast_fp16 = shape(x = linear_18_cast_fp16)[name = string("op_428_shape_cast_fp16")];
+            int32 gather_18_axis_0 = const()[name = string("gather_18_axis_0"), val = int32(0)];
+            int32 gather_18_batch_dims_0 = const()[name = string("gather_18_batch_dims_0"), val = int32(0)];
+            bool gather_18_validate_indices_0 = const()[name = string("gather_18_validate_indices_0"), val = bool(false)];
+            string var_428_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_428_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_18_to_uint16 = const()[name = string("select_18_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_428_shape_cast_fp16_to_uint16 = cast(dtype = var_428_shape_cast_fp16_to_uint16_dtype_0, x = var_428_shape_cast_fp16)[name = string("cast_115")];
+            uint16 gather_18_cast_uint16 = gather(axis = gather_18_axis_0, batch_dims = gather_18_batch_dims_0, indices = select_18_to_uint16, validate_indices = gather_18_validate_indices_0, x = var_428_shape_cast_fp16_to_uint16)[name = string("gather_18_cast_uint16")];
+            string gather_18_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_18_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_18_cast_uint16_to_int32 = cast(dtype = gather_18_cast_uint16_to_int32_dtype_0, x = gather_18_cast_uint16)[name = string("cast_114")];
+            tensor<int32, [1]> expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = gather_18_cast_uint16_to_int32)[name = string("expand_dims_83")];
+            tensor<int32, [4]> concat_59 = const()[name = string("concat_59"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [1]> concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
+            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, concat_60_values1_0, expand_dims_83, concat_60_values3_0))[name = string("concat_60")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_59, begin_mask = k_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_60, end_mask = k_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_10_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_68)[name = string("k_cache2_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_10_cast_fp16, input = k_cache2)[name = string("coreml_update_state_70_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_70 = read_state(input = k_cache2)[name = string("coreml_update_state_70")];
+            tensor<int32, [3]> var_433_shape_cast_fp16 = shape(x = linear_19_cast_fp16)[name = string("op_433_shape_cast_fp16")];
+            int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)];
+            int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)];
+            bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)];
+            string var_433_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_433_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_433_shape_cast_fp16_to_uint16 = cast(dtype = var_433_shape_cast_fp16_to_uint16_dtype_0, x = var_433_shape_cast_fp16)[name = string("cast_113")];
+            uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_433_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")];
+            string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_87_axes_0 = const()[name = string("expand_dims_87_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_112")];
+            tensor<int32, [1]> expand_dims_87 = expand_dims(axes = expand_dims_87_axes_0, x = gather_19_cast_uint16_to_int32)[name = string("expand_dims_87")];
+            tensor<int32, [4]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [1]> concat_63_values0_0 = const()[name = string("concat_63_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)];
+            bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (concat_63_values0_0, concat_63_values1_0, expand_dims_87, concat_63_values3_0))[name = string("concat_63")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_62, begin_mask = v_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_63, end_mask = v_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_10_stride_0, update = linear_19_cast_fp16, x = coreml_update_state_69)[name = string("v_cache2_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_10_cast_fp16, input = v_cache2)[name = string("coreml_update_state_71_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_71 = read_state(input = v_cache2)[name = string("coreml_update_state_71")];
+            tensor<fp16, [1024, 1024]> var_455_to_fp16 = const()[name = string("op_455_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63987776)))];
+            tensor<fp16, [1, ?, 1024]> linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_455_to_fp16, x = audio_data)[name = string("linear_20_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_459_to_fp16 = const()[name = string("op_459_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66084992)))];
+            tensor<fp16, [1024]> var_460_to_fp16 = const()[name = string("op_460_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68182208)))];
+            tensor<fp16, [1, ?, 1024]> linear_21_cast_fp16 = linear(bias = var_460_to_fp16, weight = var_459_to_fp16, x = audio_data)[name = string("linear_21_cast_fp16")];
+            tensor<int32, [3]> var_462_shape_cast_fp16 = shape(x = linear_20_cast_fp16)[name = string("op_462_shape_cast_fp16")];
+            int32 gather_20_axis_0 = const()[name = string("gather_20_axis_0"), val = int32(0)];
+            int32 gather_20_batch_dims_0 = const()[name = string("gather_20_batch_dims_0"), val = int32(0)];
+            bool gather_20_validate_indices_0 = const()[name = string("gather_20_validate_indices_0"), val = bool(false)];
+            string var_462_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_462_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_20_to_uint16 = const()[name = string("select_20_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_462_shape_cast_fp16_to_uint16 = cast(dtype = var_462_shape_cast_fp16_to_uint16_dtype_0, x = var_462_shape_cast_fp16)[name = string("cast_111")];
+            uint16 gather_20_cast_uint16 = gather(axis = gather_20_axis_0, batch_dims = gather_20_batch_dims_0, indices = select_20_to_uint16, validate_indices = gather_20_validate_indices_0, x = var_462_shape_cast_fp16_to_uint16)[name = string("gather_20_cast_uint16")];
+            string gather_20_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_20_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_91_axes_0 = const()[name = string("expand_dims_91_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_20_cast_uint16_to_int32 = cast(dtype = gather_20_cast_uint16_to_int32_dtype_0, x = gather_20_cast_uint16)[name = string("cast_110")];
+            tensor<int32, [1]> expand_dims_91 = expand_dims(axes = expand_dims_91_axes_0, x = gather_20_cast_uint16_to_int32)[name = string("expand_dims_91")];
+            tensor<int32, [4]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [1]> concat_66_values0_0 = const()[name = string("concat_66_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_66_values1_0 = const()[name = string("concat_66_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_66_values3_0 = const()[name = string("concat_66_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)];
+            bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (concat_66_values0_0, concat_66_values1_0, expand_dims_91, concat_66_values3_0))[name = string("concat_66")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_65, begin_mask = k_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_66, end_mask = k_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_11_stride_0, update = linear_20_cast_fp16, x = coreml_update_state_70)[name = string("k_cache2_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_11_cast_fp16, input = k_cache2)[name = string("coreml_update_state_72_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_72 = read_state(input = k_cache2)[name = string("coreml_update_state_72")];
+            tensor<int32, [3]> var_467_shape_cast_fp16 = shape(x = linear_21_cast_fp16)[name = string("op_467_shape_cast_fp16")];
+            int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)];
+            int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)];
+            bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)];
+            string var_467_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_467_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_467_shape_cast_fp16_to_uint16 = cast(dtype = var_467_shape_cast_fp16_to_uint16_dtype_0, x = var_467_shape_cast_fp16)[name = string("cast_109")];
+            uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_467_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")];
+            string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_95_axes_0 = const()[name = string("expand_dims_95_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_108")];
+            tensor<int32, [1]> expand_dims_95 = expand_dims(axes = expand_dims_95_axes_0, x = gather_21_cast_uint16_to_int32)[name = string("expand_dims_95")];
+            tensor<int32, [4]> concat_68 = const()[name = string("concat_68"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [1]> concat_69_values0_0 = const()[name = string("concat_69_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_69_values1_0 = const()[name = string("concat_69_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_69_values3_0 = const()[name = string("concat_69_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_69_axis_0 = const()[name = string("concat_69_axis_0"), val = int32(0)];
+            bool concat_69_interleave_0 = const()[name = string("concat_69_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_69 = concat(axis = concat_69_axis_0, interleave = concat_69_interleave_0, values = (concat_69_values0_0, concat_69_values1_0, expand_dims_95, concat_69_values3_0))[name = string("concat_69")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_68, begin_mask = v_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_69, end_mask = v_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_11_stride_0, update = linear_21_cast_fp16, x = coreml_update_state_71)[name = string("v_cache2_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_11_cast_fp16, input = v_cache2)[name = string("coreml_update_state_73_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_73 = read_state(input = v_cache2)[name = string("coreml_update_state_73")];
+            tensor<fp16, [1024, 1024]> var_489_to_fp16 = const()[name = string("op_489_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68184320)))];
+            tensor<fp16, [1, ?, 1024]> linear_22_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_489_to_fp16, x = audio_data)[name = string("linear_22_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_493_to_fp16 = const()[name = string("op_493_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70281536)))];
+            tensor<fp16, [1024]> var_494_to_fp16 = const()[name = string("op_494_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72378752)))];
+            tensor<fp16, [1, ?, 1024]> linear_23_cast_fp16 = linear(bias = var_494_to_fp16, weight = var_493_to_fp16, x = audio_data)[name = string("linear_23_cast_fp16")];
+            tensor<int32, [3]> var_496_shape_cast_fp16 = shape(x = linear_22_cast_fp16)[name = string("op_496_shape_cast_fp16")];
+            int32 gather_22_axis_0 = const()[name = string("gather_22_axis_0"), val = int32(0)];
+            int32 gather_22_batch_dims_0 = const()[name = string("gather_22_batch_dims_0"), val = int32(0)];
+            bool gather_22_validate_indices_0 = const()[name = string("gather_22_validate_indices_0"), val = bool(false)];
+            string var_496_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_496_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_22_to_uint16 = const()[name = string("select_22_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_496_shape_cast_fp16_to_uint16 = cast(dtype = var_496_shape_cast_fp16_to_uint16_dtype_0, x = var_496_shape_cast_fp16)[name = string("cast_107")];
+            uint16 gather_22_cast_uint16 = gather(axis = gather_22_axis_0, batch_dims = gather_22_batch_dims_0, indices = select_22_to_uint16, validate_indices = gather_22_validate_indices_0, x = var_496_shape_cast_fp16_to_uint16)[name = string("gather_22_cast_uint16")];
+            string gather_22_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_22_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_22_cast_uint16_to_int32 = cast(dtype = gather_22_cast_uint16_to_int32_dtype_0, x = gather_22_cast_uint16)[name = string("cast_106")];
+            tensor<int32, [1]> expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = gather_22_cast_uint16_to_int32)[name = string("expand_dims_99")];
+            tensor<int32, [4]> concat_71 = const()[name = string("concat_71"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [1]> concat_72_values0_0 = const()[name = string("concat_72_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_72_values1_0 = const()[name = string("concat_72_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_72_values3_0 = const()[name = string("concat_72_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)];
+            bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (concat_72_values0_0, concat_72_values1_0, expand_dims_99, concat_72_values3_0))[name = string("concat_72")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_71, begin_mask = k_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_72, end_mask = k_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_12_stride_0, update = linear_22_cast_fp16, x = coreml_update_state_72)[name = string("k_cache2_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_12_cast_fp16, input = k_cache2)[name = string("coreml_update_state_74_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_74 = read_state(input = k_cache2)[name = string("coreml_update_state_74")];
+            tensor<int32, [3]> var_501_shape_cast_fp16 = shape(x = linear_23_cast_fp16)[name = string("op_501_shape_cast_fp16")];
+            int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)];
+            int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)];
+            bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)];
+            string var_501_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_501_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_501_shape_cast_fp16_to_uint16 = cast(dtype = var_501_shape_cast_fp16_to_uint16_dtype_0, x = var_501_shape_cast_fp16)[name = string("cast_105")];
+            uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_501_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")];
+            string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_103_axes_0 = const()[name = string("expand_dims_103_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_104")];
+            tensor<int32, [1]> expand_dims_103 = expand_dims(axes = expand_dims_103_axes_0, x = gather_23_cast_uint16_to_int32)[name = string("expand_dims_103")];
+            tensor<int32, [4]> concat_74 = const()[name = string("concat_74"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [1]> concat_75_values0_0 = const()[name = string("concat_75_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)];
+            bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (concat_75_values0_0, concat_75_values1_0, expand_dims_103, concat_75_values3_0))[name = string("concat_75")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_74, begin_mask = v_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_75, end_mask = v_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_12_stride_0, update = linear_23_cast_fp16, x = coreml_update_state_73)[name = string("v_cache2_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_12_cast_fp16, input = v_cache2)[name = string("coreml_update_state_75_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_75 = read_state(input = v_cache2)[name = string("coreml_update_state_75")];
+            tensor<fp16, [1024, 1024]> var_523_to_fp16 = const()[name = string("op_523_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72380864)))];
+            tensor<fp16, [1, ?, 1024]> linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_523_to_fp16, x = audio_data)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_527_to_fp16 = const()[name = string("op_527_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74478080)))];
+            tensor<fp16, [1024]> var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76575296)))];
+            tensor<fp16, [1, ?, 1024]> linear_25_cast_fp16 = linear(bias = var_528_to_fp16, weight = var_527_to_fp16, x = audio_data)[name = string("linear_25_cast_fp16")];
+            tensor<int32, [3]> var_530_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_530_shape_cast_fp16")];
+            int32 gather_24_axis_0 = const()[name = string("gather_24_axis_0"), val = int32(0)];
+            int32 gather_24_batch_dims_0 = const()[name = string("gather_24_batch_dims_0"), val = int32(0)];
+            bool gather_24_validate_indices_0 = const()[name = string("gather_24_validate_indices_0"), val = bool(false)];
+            string var_530_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_530_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_24_to_uint16 = const()[name = string("select_24_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_530_shape_cast_fp16_to_uint16 = cast(dtype = var_530_shape_cast_fp16_to_uint16_dtype_0, x = var_530_shape_cast_fp16)[name = string("cast_103")];
+            uint16 gather_24_cast_uint16 = gather(axis = gather_24_axis_0, batch_dims = gather_24_batch_dims_0, indices = select_24_to_uint16, validate_indices = gather_24_validate_indices_0, x = var_530_shape_cast_fp16_to_uint16)[name = string("gather_24_cast_uint16")];
+            string gather_24_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_24_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_107_axes_0 = const()[name = string("expand_dims_107_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_24_cast_uint16_to_int32 = cast(dtype = gather_24_cast_uint16_to_int32_dtype_0, x = gather_24_cast_uint16)[name = string("cast_102")];
+            tensor<int32, [1]> expand_dims_107 = expand_dims(axes = expand_dims_107_axes_0, x = gather_24_cast_uint16_to_int32)[name = string("expand_dims_107")];
+            tensor<int32, [4]> concat_77 = const()[name = string("concat_77"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [1]> concat_78_values0_0 = const()[name = string("concat_78_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_78_values1_0 = const()[name = string("concat_78_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_78_values3_0 = const()[name = string("concat_78_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)];
+            bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (concat_78_values0_0, concat_78_values1_0, expand_dims_107, concat_78_values3_0))[name = string("concat_78")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_77, begin_mask = k_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_78, end_mask = k_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_13_stride_0, update = linear_24_cast_fp16, x = coreml_update_state_74)[name = string("k_cache2_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_13_cast_fp16, input = k_cache2)[name = string("coreml_update_state_76_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_76 = read_state(input = k_cache2)[name = string("coreml_update_state_76")];
+            tensor<int32, [3]> var_535_shape_cast_fp16 = shape(x = linear_25_cast_fp16)[name = string("op_535_shape_cast_fp16")];
+            int32 gather_25_axis_0 = const()[name = string("gather_25_axis_0"), val = int32(0)];
+            int32 gather_25_batch_dims_0 = const()[name = string("gather_25_batch_dims_0"), val = int32(0)];
+            bool gather_25_validate_indices_0 = const()[name = string("gather_25_validate_indices_0"), val = bool(false)];
+            string var_535_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_535_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_25_to_uint16 = const()[name = string("select_25_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_535_shape_cast_fp16_to_uint16 = cast(dtype = var_535_shape_cast_fp16_to_uint16_dtype_0, x = var_535_shape_cast_fp16)[name = string("cast_101")];
+            uint16 gather_25_cast_uint16 = gather(axis = gather_25_axis_0, batch_dims = gather_25_batch_dims_0, indices = select_25_to_uint16, validate_indices = gather_25_validate_indices_0, x = var_535_shape_cast_fp16_to_uint16)[name = string("gather_25_cast_uint16")];
+            string gather_25_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_25_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_111_axes_0 = const()[name = string("expand_dims_111_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_25_cast_uint16_to_int32 = cast(dtype = gather_25_cast_uint16_to_int32_dtype_0, x = gather_25_cast_uint16)[name = string("cast_100")];
+            tensor<int32, [1]> expand_dims_111 = expand_dims(axes = expand_dims_111_axes_0, x = gather_25_cast_uint16_to_int32)[name = string("expand_dims_111")];
+            tensor<int32, [4]> concat_80 = const()[name = string("concat_80"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [1]> concat_81_values0_0 = const()[name = string("concat_81_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_81_values3_0 = const()[name = string("concat_81_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
+            bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (concat_81_values0_0, concat_81_values1_0, expand_dims_111, concat_81_values3_0))[name = string("concat_81")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_80, begin_mask = v_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_81, end_mask = v_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_13_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_75)[name = string("v_cache2_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_13_cast_fp16, input = v_cache2)[name = string("coreml_update_state_77_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_77 = read_state(input = v_cache2)[name = string("coreml_update_state_77")];
+            tensor<fp16, [1024, 1024]> var_557_to_fp16 = const()[name = string("op_557_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76577408)))];
+            tensor<fp16, [1, ?, 1024]> linear_26_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_557_to_fp16, x = audio_data)[name = string("linear_26_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_561_to_fp16 = const()[name = string("op_561_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78674624)))];
+            tensor<fp16, [1024]> var_562_to_fp16 = const()[name = string("op_562_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80771840)))];
+            tensor<fp16, [1, ?, 1024]> linear_27_cast_fp16 = linear(bias = var_562_to_fp16, weight = var_561_to_fp16, x = audio_data)[name = string("linear_27_cast_fp16")];
+            tensor<int32, [3]> var_564_shape_cast_fp16 = shape(x = linear_26_cast_fp16)[name = string("op_564_shape_cast_fp16")];
+            int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
+            int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
+            bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
+            string var_564_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_564_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_564_shape_cast_fp16_to_uint16 = cast(dtype = var_564_shape_cast_fp16_to_uint16_dtype_0, x = var_564_shape_cast_fp16)[name = string("cast_99")];
+            uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_564_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
+            string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_98")];
+            tensor<int32, [1]> expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = gather_26_cast_uint16_to_int32)[name = string("expand_dims_115")];
+            tensor<int32, [4]> concat_83 = const()[name = string("concat_83"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [1]> concat_84_values0_0 = const()[name = string("concat_84_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)];
+            bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (concat_84_values0_0, concat_84_values1_0, expand_dims_115, concat_84_values3_0))[name = string("concat_84")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_83, begin_mask = k_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_84, end_mask = k_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_14_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_76)[name = string("k_cache2_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_14_cast_fp16, input = k_cache2)[name = string("coreml_update_state_78_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_78 = read_state(input = k_cache2)[name = string("coreml_update_state_78")];
+            tensor<int32, [3]> var_569_shape_cast_fp16 = shape(x = linear_27_cast_fp16)[name = string("op_569_shape_cast_fp16")];
+            int32 gather_27_axis_0 = const()[name = string("gather_27_axis_0"), val = int32(0)];
+            int32 gather_27_batch_dims_0 = const()[name = string("gather_27_batch_dims_0"), val = int32(0)];
+            bool gather_27_validate_indices_0 = const()[name = string("gather_27_validate_indices_0"), val = bool(false)];
+            string var_569_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_569_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_27_to_uint16 = const()[name = string("select_27_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_569_shape_cast_fp16_to_uint16 = cast(dtype = var_569_shape_cast_fp16_to_uint16_dtype_0, x = var_569_shape_cast_fp16)[name = string("cast_97")];
+            uint16 gather_27_cast_uint16 = gather(axis = gather_27_axis_0, batch_dims = gather_27_batch_dims_0, indices = select_27_to_uint16, validate_indices = gather_27_validate_indices_0, x = var_569_shape_cast_fp16_to_uint16)[name = string("gather_27_cast_uint16")];
+            string gather_27_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_27_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_119_axes_0 = const()[name = string("expand_dims_119_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_27_cast_uint16_to_int32 = cast(dtype = gather_27_cast_uint16_to_int32_dtype_0, x = gather_27_cast_uint16)[name = string("cast_96")];
+            tensor<int32, [1]> expand_dims_119 = expand_dims(axes = expand_dims_119_axes_0, x = gather_27_cast_uint16_to_int32)[name = string("expand_dims_119")];
+            tensor<int32, [4]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [1]> concat_87_values0_0 = const()[name = string("concat_87_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)];
+            bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (concat_87_values0_0, concat_87_values1_0, expand_dims_119, concat_87_values3_0))[name = string("concat_87")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_87, end_mask = v_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_14_stride_0, update = linear_27_cast_fp16, x = coreml_update_state_77)[name = string("v_cache2_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_14_cast_fp16, input = v_cache2)[name = string("coreml_update_state_79_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_79 = read_state(input = v_cache2)[name = string("coreml_update_state_79")];
+            tensor<fp16, [1024, 1024]> var_591_to_fp16 = const()[name = string("op_591_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80773952)))];
+            tensor<fp16, [1, ?, 1024]> linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_591_to_fp16, x = audio_data)[name = string("linear_28_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_595_to_fp16 = const()[name = string("op_595_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82871168)))];
+            tensor<fp16, [1024]> var_596_to_fp16 = const()[name = string("op_596_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84968384)))];
+            tensor<fp16, [1, ?, 1024]> linear_29_cast_fp16 = linear(bias = var_596_to_fp16, weight = var_595_to_fp16, x = audio_data)[name = string("linear_29_cast_fp16")];
+            tensor<int32, [3]> var_598_shape_cast_fp16 = shape(x = linear_28_cast_fp16)[name = string("op_598_shape_cast_fp16")];
+            int32 gather_28_axis_0 = const()[name = string("gather_28_axis_0"), val = int32(0)];
+            int32 gather_28_batch_dims_0 = const()[name = string("gather_28_batch_dims_0"), val = int32(0)];
+            bool gather_28_validate_indices_0 = const()[name = string("gather_28_validate_indices_0"), val = bool(false)];
+            string var_598_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_598_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_28_to_uint16 = const()[name = string("select_28_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_598_shape_cast_fp16_to_uint16 = cast(dtype = var_598_shape_cast_fp16_to_uint16_dtype_0, x = var_598_shape_cast_fp16)[name = string("cast_95")];
+            uint16 gather_28_cast_uint16 = gather(axis = gather_28_axis_0, batch_dims = gather_28_batch_dims_0, indices = select_28_to_uint16, validate_indices = gather_28_validate_indices_0, x = var_598_shape_cast_fp16_to_uint16)[name = string("gather_28_cast_uint16")];
+            string gather_28_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_28_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_123_axes_0 = const()[name = string("expand_dims_123_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_28_cast_uint16_to_int32 = cast(dtype = gather_28_cast_uint16_to_int32_dtype_0, x = gather_28_cast_uint16)[name = string("cast_94")];
+            tensor<int32, [1]> expand_dims_123 = expand_dims(axes = expand_dims_123_axes_0, x = gather_28_cast_uint16_to_int32)[name = string("expand_dims_123")];
+            tensor<int32, [4]> concat_89 = const()[name = string("concat_89"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [1]> concat_90_values0_0 = const()[name = string("concat_90_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_90_values1_0 = const()[name = string("concat_90_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_90_values3_0 = const()[name = string("concat_90_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)];
+            bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (concat_90_values0_0, concat_90_values1_0, expand_dims_123, concat_90_values3_0))[name = string("concat_90")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_89, begin_mask = k_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_90, end_mask = k_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_15_stride_0, update = linear_28_cast_fp16, x = coreml_update_state_78)[name = string("k_cache2_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_15_cast_fp16, input = k_cache2)[name = string("coreml_update_state_80_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_80 = read_state(input = k_cache2)[name = string("coreml_update_state_80")];
+            tensor<int32, [3]> var_603_shape_cast_fp16 = shape(x = linear_29_cast_fp16)[name = string("op_603_shape_cast_fp16")];
+            int32 gather_29_axis_0 = const()[name = string("gather_29_axis_0"), val = int32(0)];
+            int32 gather_29_batch_dims_0 = const()[name = string("gather_29_batch_dims_0"), val = int32(0)];
+            bool gather_29_validate_indices_0 = const()[name = string("gather_29_validate_indices_0"), val = bool(false)];
+            string var_603_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_603_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_29_to_uint16 = const()[name = string("select_29_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_603_shape_cast_fp16_to_uint16 = cast(dtype = var_603_shape_cast_fp16_to_uint16_dtype_0, x = var_603_shape_cast_fp16)[name = string("cast_93")];
+            uint16 gather_29_cast_uint16 = gather(axis = gather_29_axis_0, batch_dims = gather_29_batch_dims_0, indices = select_29_to_uint16, validate_indices = gather_29_validate_indices_0, x = var_603_shape_cast_fp16_to_uint16)[name = string("gather_29_cast_uint16")];
+            string gather_29_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_29_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_127_axes_0 = const()[name = string("expand_dims_127_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_29_cast_uint16_to_int32 = cast(dtype = gather_29_cast_uint16_to_int32_dtype_0, x = gather_29_cast_uint16)[name = string("cast_92")];
+            tensor<int32, [1]> expand_dims_127 = expand_dims(axes = expand_dims_127_axes_0, x = gather_29_cast_uint16_to_int32)[name = string("expand_dims_127")];
+            tensor<int32, [4]> concat_92 = const()[name = string("concat_92"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [1]> concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
+            bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_127, concat_93_values3_0))[name = string("concat_93")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_93, end_mask = v_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_15_stride_0, update = linear_29_cast_fp16, x = coreml_update_state_79)[name = string("v_cache2_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_15_cast_fp16, input = v_cache2)[name = string("coreml_update_state_81_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_81 = read_state(input = v_cache2)[name = string("coreml_update_state_81")];
+            tensor<fp16, [1024, 1024]> var_625_to_fp16 = const()[name = string("op_625_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84970496)))];
+            tensor<fp16, [1, ?, 1024]> linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_625_to_fp16, x = audio_data)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_629_to_fp16 = const()[name = string("op_629_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87067712)))];
+            tensor<fp16, [1024]> var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89164928)))];
+            tensor<fp16, [1, ?, 1024]> linear_31_cast_fp16 = linear(bias = var_630_to_fp16, weight = var_629_to_fp16, x = audio_data)[name = string("linear_31_cast_fp16")];
+            tensor<int32, [3]> var_632_shape_cast_fp16 = shape(x = linear_30_cast_fp16)[name = string("op_632_shape_cast_fp16")];
+            int32 gather_30_axis_0 = const()[name = string("gather_30_axis_0"), val = int32(0)];
+            int32 gather_30_batch_dims_0 = const()[name = string("gather_30_batch_dims_0"), val = int32(0)];
+            bool gather_30_validate_indices_0 = const()[name = string("gather_30_validate_indices_0"), val = bool(false)];
+            string var_632_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_632_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_30_to_uint16 = const()[name = string("select_30_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_632_shape_cast_fp16_to_uint16 = cast(dtype = var_632_shape_cast_fp16_to_uint16_dtype_0, x = var_632_shape_cast_fp16)[name = string("cast_91")];
+            uint16 gather_30_cast_uint16 = gather(axis = gather_30_axis_0, batch_dims = gather_30_batch_dims_0, indices = select_30_to_uint16, validate_indices = gather_30_validate_indices_0, x = var_632_shape_cast_fp16_to_uint16)[name = string("gather_30_cast_uint16")];
+            string gather_30_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_30_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_30_cast_uint16_to_int32 = cast(dtype = gather_30_cast_uint16_to_int32_dtype_0, x = gather_30_cast_uint16)[name = string("cast_90")];
+            tensor<int32, [1]> expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = gather_30_cast_uint16_to_int32)[name = string("expand_dims_131")];
+            tensor<int32, [4]> concat_95 = const()[name = string("concat_95"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [1]> concat_96_values0_0 = const()[name = string("concat_96_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_96_values1_0 = const()[name = string("concat_96_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_96_values3_0 = const()[name = string("concat_96_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)];
+            bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (concat_96_values0_0, concat_96_values1_0, expand_dims_131, concat_96_values3_0))[name = string("concat_96")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_95, begin_mask = k_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_96, end_mask = k_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_16_stride_0, update = linear_30_cast_fp16, x = coreml_update_state_80)[name = string("k_cache2_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_16_cast_fp16, input = k_cache2)[name = string("coreml_update_state_82_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_82 = read_state(input = k_cache2)[name = string("coreml_update_state_82")];
+            tensor<int32, [3]> var_637_shape_cast_fp16 = shape(x = linear_31_cast_fp16)[name = string("op_637_shape_cast_fp16")];
+            int32 gather_31_axis_0 = const()[name = string("gather_31_axis_0"), val = int32(0)];
+            int32 gather_31_batch_dims_0 = const()[name = string("gather_31_batch_dims_0"), val = int32(0)];
+            bool gather_31_validate_indices_0 = const()[name = string("gather_31_validate_indices_0"), val = bool(false)];
+            string var_637_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_637_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_31_to_uint16 = const()[name = string("select_31_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_637_shape_cast_fp16_to_uint16 = cast(dtype = var_637_shape_cast_fp16_to_uint16_dtype_0, x = var_637_shape_cast_fp16)[name = string("cast_89")];
+            uint16 gather_31_cast_uint16 = gather(axis = gather_31_axis_0, batch_dims = gather_31_batch_dims_0, indices = select_31_to_uint16, validate_indices = gather_31_validate_indices_0, x = var_637_shape_cast_fp16_to_uint16)[name = string("gather_31_cast_uint16")];
+            string gather_31_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_31_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_135_axes_0 = const()[name = string("expand_dims_135_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_31_cast_uint16_to_int32 = cast(dtype = gather_31_cast_uint16_to_int32_dtype_0, x = gather_31_cast_uint16)[name = string("cast_88")];
+            tensor<int32, [1]> expand_dims_135 = expand_dims(axes = expand_dims_135_axes_0, x = gather_31_cast_uint16_to_int32)[name = string("expand_dims_135")];
+            tensor<int32, [4]> concat_98 = const()[name = string("concat_98"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [1]> concat_99_values0_0 = const()[name = string("concat_99_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)];
+            bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (concat_99_values0_0, concat_99_values1_0, expand_dims_135, concat_99_values3_0))[name = string("concat_99")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_98, begin_mask = v_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_99, end_mask = v_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_16_stride_0, update = linear_31_cast_fp16, x = coreml_update_state_81)[name = string("v_cache2_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_16_cast_fp16, input = v_cache2)[name = string("coreml_update_state_83_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_83 = read_state(input = v_cache2)[name = string("coreml_update_state_83")];
+            tensor<fp16, [1024, 1024]> var_659_to_fp16 = const()[name = string("op_659_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89167040)))];
+            tensor<fp16, [1, ?, 1024]> linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_659_to_fp16, x = audio_data)[name = string("linear_32_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_663_to_fp16 = const()[name = string("op_663_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91264256)))];
+            tensor<fp16, [1024]> var_664_to_fp16 = const()[name = string("op_664_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93361472)))];
+            tensor<fp16, [1, ?, 1024]> linear_33_cast_fp16 = linear(bias = var_664_to_fp16, weight = var_663_to_fp16, x = audio_data)[name = string("linear_33_cast_fp16")];
+            tensor<int32, [3]> var_666_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_666_shape_cast_fp16")];
+            int32 gather_32_axis_0 = const()[name = string("gather_32_axis_0"), val = int32(0)];
+            int32 gather_32_batch_dims_0 = const()[name = string("gather_32_batch_dims_0"), val = int32(0)];
+            bool gather_32_validate_indices_0 = const()[name = string("gather_32_validate_indices_0"), val = bool(false)];
+            string var_666_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_666_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_32_to_uint16 = const()[name = string("select_32_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_666_shape_cast_fp16_to_uint16 = cast(dtype = var_666_shape_cast_fp16_to_uint16_dtype_0, x = var_666_shape_cast_fp16)[name = string("cast_87")];
+            uint16 gather_32_cast_uint16 = gather(axis = gather_32_axis_0, batch_dims = gather_32_batch_dims_0, indices = select_32_to_uint16, validate_indices = gather_32_validate_indices_0, x = var_666_shape_cast_fp16_to_uint16)[name = string("gather_32_cast_uint16")];
+            string gather_32_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_32_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_139_axes_0 = const()[name = string("expand_dims_139_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_32_cast_uint16_to_int32 = cast(dtype = gather_32_cast_uint16_to_int32_dtype_0, x = gather_32_cast_uint16)[name = string("cast_86")];
+            tensor<int32, [1]> expand_dims_139 = expand_dims(axes = expand_dims_139_axes_0, x = gather_32_cast_uint16_to_int32)[name = string("expand_dims_139")];
+            tensor<int32, [4]> concat_101 = const()[name = string("concat_101"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [1]> concat_102_values0_0 = const()[name = string("concat_102_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_102_values1_0 = const()[name = string("concat_102_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_102_values3_0 = const()[name = string("concat_102_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)];
+            bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (concat_102_values0_0, concat_102_values1_0, expand_dims_139, concat_102_values3_0))[name = string("concat_102")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_101, begin_mask = k_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_102, end_mask = k_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_17_stride_0, update = linear_32_cast_fp16, x = coreml_update_state_82)[name = string("k_cache2_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_17_cast_fp16, input = k_cache2)[name = string("coreml_update_state_84_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_84 = read_state(input = k_cache2)[name = string("coreml_update_state_84")];
+            tensor<int32, [3]> var_671_shape_cast_fp16 = shape(x = linear_33_cast_fp16)[name = string("op_671_shape_cast_fp16")];
+            int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)];
+            int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)];
+            bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)];
+            string var_671_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_671_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_671_shape_cast_fp16_to_uint16 = cast(dtype = var_671_shape_cast_fp16_to_uint16_dtype_0, x = var_671_shape_cast_fp16)[name = string("cast_85")];
+            uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_671_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")];
+            string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_143_axes_0 = const()[name = string("expand_dims_143_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_84")];
+            tensor<int32, [1]> expand_dims_143 = expand_dims(axes = expand_dims_143_axes_0, x = gather_33_cast_uint16_to_int32)[name = string("expand_dims_143")];
+            tensor<int32, [4]> concat_104 = const()[name = string("concat_104"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [1]> concat_105_values0_0 = const()[name = string("concat_105_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_105_values1_0 = const()[name = string("concat_105_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_105_values3_0 = const()[name = string("concat_105_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_105_axis_0 = const()[name = string("concat_105_axis_0"), val = int32(0)];
+            bool concat_105_interleave_0 = const()[name = string("concat_105_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_105 = concat(axis = concat_105_axis_0, interleave = concat_105_interleave_0, values = (concat_105_values0_0, concat_105_values1_0, expand_dims_143, concat_105_values3_0))[name = string("concat_105")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_104, begin_mask = v_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_105, end_mask = v_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_17_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_83)[name = string("v_cache2_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_17_cast_fp16, input = v_cache2)[name = string("coreml_update_state_85_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_85 = read_state(input = v_cache2)[name = string("coreml_update_state_85")];
+            tensor<fp16, [1024, 1024]> var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93363584)))];
+            tensor<fp16, [1, ?, 1024]> linear_34_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_693_to_fp16, x = audio_data)[name = string("linear_34_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_697_to_fp16 = const()[name = string("op_697_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95460800)))];
+            tensor<fp16, [1024]> var_698_to_fp16 = const()[name = string("op_698_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97558016)))];
+            tensor<fp16, [1, ?, 1024]> linear_35_cast_fp16 = linear(bias = var_698_to_fp16, weight = var_697_to_fp16, x = audio_data)[name = string("linear_35_cast_fp16")];
+            tensor<int32, [3]> var_700_shape_cast_fp16 = shape(x = linear_34_cast_fp16)[name = string("op_700_shape_cast_fp16")];
+            int32 gather_34_axis_0 = const()[name = string("gather_34_axis_0"), val = int32(0)];
+            int32 gather_34_batch_dims_0 = const()[name = string("gather_34_batch_dims_0"), val = int32(0)];
+            bool gather_34_validate_indices_0 = const()[name = string("gather_34_validate_indices_0"), val = bool(false)];
+            string var_700_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_700_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_34_to_uint16 = const()[name = string("select_34_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_700_shape_cast_fp16_to_uint16 = cast(dtype = var_700_shape_cast_fp16_to_uint16_dtype_0, x = var_700_shape_cast_fp16)[name = string("cast_83")];
+            uint16 gather_34_cast_uint16 = gather(axis = gather_34_axis_0, batch_dims = gather_34_batch_dims_0, indices = select_34_to_uint16, validate_indices = gather_34_validate_indices_0, x = var_700_shape_cast_fp16_to_uint16)[name = string("gather_34_cast_uint16")];
+            string gather_34_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_34_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_34_cast_uint16_to_int32 = cast(dtype = gather_34_cast_uint16_to_int32_dtype_0, x = gather_34_cast_uint16)[name = string("cast_82")];
+            tensor<int32, [1]> expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = gather_34_cast_uint16_to_int32)[name = string("expand_dims_147")];
+            tensor<int32, [4]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [1]> concat_108_values0_0 = const()[name = string("concat_108_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)];
+            bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (concat_108_values0_0, concat_108_values1_0, expand_dims_147, concat_108_values3_0))[name = string("concat_108")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_107, begin_mask = k_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_108, end_mask = k_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_18_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_84)[name = string("k_cache2_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_18_cast_fp16, input = k_cache2)[name = string("coreml_update_state_86_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_86 = read_state(input = k_cache2)[name = string("coreml_update_state_86")];
+            tensor<int32, [3]> var_705_shape_cast_fp16 = shape(x = linear_35_cast_fp16)[name = string("op_705_shape_cast_fp16")];
+            int32 gather_35_axis_0 = const()[name = string("gather_35_axis_0"), val = int32(0)];
+            int32 gather_35_batch_dims_0 = const()[name = string("gather_35_batch_dims_0"), val = int32(0)];
+            bool gather_35_validate_indices_0 = const()[name = string("gather_35_validate_indices_0"), val = bool(false)];
+            string var_705_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_705_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_35_to_uint16 = const()[name = string("select_35_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_705_shape_cast_fp16_to_uint16 = cast(dtype = var_705_shape_cast_fp16_to_uint16_dtype_0, x = var_705_shape_cast_fp16)[name = string("cast_81")];
+            uint16 gather_35_cast_uint16 = gather(axis = gather_35_axis_0, batch_dims = gather_35_batch_dims_0, indices = select_35_to_uint16, validate_indices = gather_35_validate_indices_0, x = var_705_shape_cast_fp16_to_uint16)[name = string("gather_35_cast_uint16")];
+            string gather_35_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_35_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_151_axes_0 = const()[name = string("expand_dims_151_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_35_cast_uint16_to_int32 = cast(dtype = gather_35_cast_uint16_to_int32_dtype_0, x = gather_35_cast_uint16)[name = string("cast_80")];
+            tensor<int32, [1]> expand_dims_151 = expand_dims(axes = expand_dims_151_axes_0, x = gather_35_cast_uint16_to_int32)[name = string("expand_dims_151")];
+            tensor<int32, [4]> concat_110 = const()[name = string("concat_110"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [1]> concat_111_values0_0 = const()[name = string("concat_111_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)];
+            bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (concat_111_values0_0, concat_111_values1_0, expand_dims_151, concat_111_values3_0))[name = string("concat_111")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_110, begin_mask = v_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_111, end_mask = v_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_18_stride_0, update = linear_35_cast_fp16, x = coreml_update_state_85)[name = string("v_cache2_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_18_cast_fp16, input = v_cache2)[name = string("coreml_update_state_87_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_87 = read_state(input = v_cache2)[name = string("coreml_update_state_87")];
+            tensor<fp16, [1024, 1024]> var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97560128)))];
+            tensor<fp16, [1, ?, 1024]> linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_727_to_fp16, x = audio_data)[name = string("linear_36_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_731_to_fp16 = const()[name = string("op_731_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99657344)))];
+            tensor<fp16, [1024]> var_732_to_fp16 = const()[name = string("op_732_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101754560)))];
+            tensor<fp16, [1, ?, 1024]> linear_37_cast_fp16 = linear(bias = var_732_to_fp16, weight = var_731_to_fp16, x = audio_data)[name = string("linear_37_cast_fp16")];
+            tensor<int32, [3]> var_734_shape_cast_fp16 = shape(x = linear_36_cast_fp16)[name = string("op_734_shape_cast_fp16")];
+            int32 gather_36_axis_0 = const()[name = string("gather_36_axis_0"), val = int32(0)];
+            int32 gather_36_batch_dims_0 = const()[name = string("gather_36_batch_dims_0"), val = int32(0)];
+            bool gather_36_validate_indices_0 = const()[name = string("gather_36_validate_indices_0"), val = bool(false)];
+            string var_734_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_734_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_36_to_uint16 = const()[name = string("select_36_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_734_shape_cast_fp16_to_uint16 = cast(dtype = var_734_shape_cast_fp16_to_uint16_dtype_0, x = var_734_shape_cast_fp16)[name = string("cast_79")];
+            uint16 gather_36_cast_uint16 = gather(axis = gather_36_axis_0, batch_dims = gather_36_batch_dims_0, indices = select_36_to_uint16, validate_indices = gather_36_validate_indices_0, x = var_734_shape_cast_fp16_to_uint16)[name = string("gather_36_cast_uint16")];
+            string gather_36_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_36_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_155_axes_0 = const()[name = string("expand_dims_155_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_36_cast_uint16_to_int32 = cast(dtype = gather_36_cast_uint16_to_int32_dtype_0, x = gather_36_cast_uint16)[name = string("cast_78")];
+            tensor<int32, [1]> expand_dims_155 = expand_dims(axes = expand_dims_155_axes_0, x = gather_36_cast_uint16_to_int32)[name = string("expand_dims_155")];
+            tensor<int32, [4]> concat_113 = const()[name = string("concat_113"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [1]> concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_114_values1_0 = const()[name = string("concat_114_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_114_values3_0 = const()[name = string("concat_114_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
+            bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, concat_114_values1_0, expand_dims_155, concat_114_values3_0))[name = string("concat_114")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_113, begin_mask = k_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_114, end_mask = k_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_19_stride_0, update = linear_36_cast_fp16, x = coreml_update_state_86)[name = string("k_cache2_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_19_cast_fp16, input = k_cache2)[name = string("coreml_update_state_88_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_88 = read_state(input = k_cache2)[name = string("coreml_update_state_88")];
+            tensor<int32, [3]> var_739_shape_cast_fp16 = shape(x = linear_37_cast_fp16)[name = string("op_739_shape_cast_fp16")];
+            int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)];
+            int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)];
+            bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)];
+            string var_739_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_739_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_739_shape_cast_fp16_to_uint16 = cast(dtype = var_739_shape_cast_fp16_to_uint16_dtype_0, x = var_739_shape_cast_fp16)[name = string("cast_77")];
+            uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_739_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")];
+            string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_159_axes_0 = const()[name = string("expand_dims_159_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_76")];
+            tensor<int32, [1]> expand_dims_159 = expand_dims(axes = expand_dims_159_axes_0, x = gather_37_cast_uint16_to_int32)[name = string("expand_dims_159")];
+            tensor<int32, [4]> concat_116 = const()[name = string("concat_116"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [1]> concat_117_values0_0 = const()[name = string("concat_117_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_117_values1_0 = const()[name = string("concat_117_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_117_values3_0 = const()[name = string("concat_117_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_117_axis_0 = const()[name = string("concat_117_axis_0"), val = int32(0)];
+            bool concat_117_interleave_0 = const()[name = string("concat_117_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_117 = concat(axis = concat_117_axis_0, interleave = concat_117_interleave_0, values = (concat_117_values0_0, concat_117_values1_0, expand_dims_159, concat_117_values3_0))[name = string("concat_117")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_116, begin_mask = v_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_117, end_mask = v_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_19_stride_0, update = linear_37_cast_fp16, x = coreml_update_state_87)[name = string("v_cache2_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_19_cast_fp16, input = v_cache2)[name = string("coreml_update_state_89_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_89 = read_state(input = v_cache2)[name = string("coreml_update_state_89")];
+            tensor<fp16, [1024, 1024]> var_761_to_fp16 = const()[name = string("op_761_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101756672)))];
+            tensor<fp16, [1, ?, 1024]> linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_761_to_fp16, x = audio_data)[name = string("linear_38_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_765_to_fp16 = const()[name = string("op_765_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103853888)))];
+            tensor<fp16, [1024]> var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105951104)))];
+            tensor<fp16, [1, ?, 1024]> linear_39_cast_fp16 = linear(bias = var_766_to_fp16, weight = var_765_to_fp16, x = audio_data)[name = string("linear_39_cast_fp16")];
+            tensor<int32, [3]> var_768_shape_cast_fp16 = shape(x = linear_38_cast_fp16)[name = string("op_768_shape_cast_fp16")];
+            int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
+            int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
+            bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
+            string var_768_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_768_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_768_shape_cast_fp16_to_uint16 = cast(dtype = var_768_shape_cast_fp16_to_uint16_dtype_0, x = var_768_shape_cast_fp16)[name = string("cast_75")];
+            uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_768_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
+            string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_74")];
+            tensor<int32, [1]> expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = gather_38_cast_uint16_to_int32)[name = string("expand_dims_163")];
+            tensor<int32, [4]> concat_119 = const()[name = string("concat_119"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [1]> concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_120_values1_0 = const()[name = string("concat_120_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_120_values3_0 = const()[name = string("concat_120_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)];
+            bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, concat_120_values1_0, expand_dims_163, concat_120_values3_0))[name = string("concat_120")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_119, begin_mask = k_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_120, end_mask = k_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_20_stride_0, update = linear_38_cast_fp16, x = coreml_update_state_88)[name = string("k_cache2_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_20_cast_fp16, input = k_cache2)[name = string("coreml_update_state_90_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_90 = read_state(input = k_cache2)[name = string("coreml_update_state_90")];
+            tensor<int32, [3]> var_773_shape_cast_fp16 = shape(x = linear_39_cast_fp16)[name = string("op_773_shape_cast_fp16")];
+            int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)];
+            int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)];
+            bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)];
+            string var_773_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_773_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_773_shape_cast_fp16_to_uint16 = cast(dtype = var_773_shape_cast_fp16_to_uint16_dtype_0, x = var_773_shape_cast_fp16)[name = string("cast_73")];
+            uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_773_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")];
+            string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_167_axes_0 = const()[name = string("expand_dims_167_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_39_cast_uint16_to_int32 = cast(dtype = gather_39_cast_uint16_to_int32_dtype_0, x = gather_39_cast_uint16)[name = string("cast_72")];
+            tensor<int32, [1]> expand_dims_167 = expand_dims(axes = expand_dims_167_axes_0, x = gather_39_cast_uint16_to_int32)[name = string("expand_dims_167")];
+            tensor<int32, [4]> concat_122 = const()[name = string("concat_122"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [1]> concat_123_values0_0 = const()[name = string("concat_123_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)];
+            bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (concat_123_values0_0, concat_123_values1_0, expand_dims_167, concat_123_values3_0))[name = string("concat_123")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_122, begin_mask = v_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_123, end_mask = v_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_20_stride_0, update = linear_39_cast_fp16, x = coreml_update_state_89)[name = string("v_cache2_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_20_cast_fp16, input = v_cache2)[name = string("coreml_update_state_91_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_91 = read_state(input = v_cache2)[name = string("coreml_update_state_91")];
+            tensor<fp16, [1024, 1024]> var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105953216)))];
+            tensor<fp16, [1, ?, 1024]> linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_795_to_fp16, x = audio_data)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_799_to_fp16 = const()[name = string("op_799_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108050432)))];
+            tensor<fp16, [1024]> var_800_to_fp16 = const()[name = string("op_800_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110147648)))];
+            tensor<fp16, [1, ?, 1024]> linear_41_cast_fp16 = linear(bias = var_800_to_fp16, weight = var_799_to_fp16, x = audio_data)[name = string("linear_41_cast_fp16")];
+            tensor<int32, [3]> var_802_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_802_shape_cast_fp16")];
+            int32 gather_40_axis_0 = const()[name = string("gather_40_axis_0"), val = int32(0)];
+            int32 gather_40_batch_dims_0 = const()[name = string("gather_40_batch_dims_0"), val = int32(0)];
+            bool gather_40_validate_indices_0 = const()[name = string("gather_40_validate_indices_0"), val = bool(false)];
+            string var_802_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_802_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_40_to_uint16 = const()[name = string("select_40_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_802_shape_cast_fp16_to_uint16 = cast(dtype = var_802_shape_cast_fp16_to_uint16_dtype_0, x = var_802_shape_cast_fp16)[name = string("cast_71")];
+            uint16 gather_40_cast_uint16 = gather(axis = gather_40_axis_0, batch_dims = gather_40_batch_dims_0, indices = select_40_to_uint16, validate_indices = gather_40_validate_indices_0, x = var_802_shape_cast_fp16_to_uint16)[name = string("gather_40_cast_uint16")];
+            string gather_40_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_40_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_171_axes_0 = const()[name = string("expand_dims_171_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_40_cast_uint16_to_int32 = cast(dtype = gather_40_cast_uint16_to_int32_dtype_0, x = gather_40_cast_uint16)[name = string("cast_70")];
+            tensor<int32, [1]> expand_dims_171 = expand_dims(axes = expand_dims_171_axes_0, x = gather_40_cast_uint16_to_int32)[name = string("expand_dims_171")];
+            tensor<int32, [4]> concat_125 = const()[name = string("concat_125"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [1]> concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_126_values1_0 = const()[name = string("concat_126_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_126_values3_0 = const()[name = string("concat_126_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)];
+            bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, concat_126_values1_0, expand_dims_171, concat_126_values3_0))[name = string("concat_126")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_125, begin_mask = k_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_126, end_mask = k_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_21_stride_0, update = linear_40_cast_fp16, x = coreml_update_state_90)[name = string("k_cache2_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_21_cast_fp16, input = k_cache2)[name = string("coreml_update_state_92_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_92 = read_state(input = k_cache2)[name = string("coreml_update_state_92")];
+            tensor<int32, [3]> var_807_shape_cast_fp16 = shape(x = linear_41_cast_fp16)[name = string("op_807_shape_cast_fp16")];
+            int32 gather_41_axis_0 = const()[name = string("gather_41_axis_0"), val = int32(0)];
+            int32 gather_41_batch_dims_0 = const()[name = string("gather_41_batch_dims_0"), val = int32(0)];
+            bool gather_41_validate_indices_0 = const()[name = string("gather_41_validate_indices_0"), val = bool(false)];
+            string var_807_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_807_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_41_to_uint16 = const()[name = string("select_41_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_807_shape_cast_fp16_to_uint16 = cast(dtype = var_807_shape_cast_fp16_to_uint16_dtype_0, x = var_807_shape_cast_fp16)[name = string("cast_69")];
+            uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_807_shape_cast_fp16_to_uint16)[name = string("gather_41_cast_uint16")];
+            string gather_41_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_41_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_175_axes_0 = const()[name = string("expand_dims_175_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_41_cast_uint16_to_int32 = cast(dtype = gather_41_cast_uint16_to_int32_dtype_0, x = gather_41_cast_uint16)[name = string("cast_68")];
+            tensor<int32, [1]> expand_dims_175 = expand_dims(axes = expand_dims_175_axes_0, x = gather_41_cast_uint16_to_int32)[name = string("expand_dims_175")];
+            tensor<int32, [4]> concat_128 = const()[name = string("concat_128"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [1]> concat_129_values0_0 = const()[name = string("concat_129_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)];
+            bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (concat_129_values0_0, concat_129_values1_0, expand_dims_175, concat_129_values3_0))[name = string("concat_129")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_128, begin_mask = v_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_129, end_mask = v_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_21_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_91)[name = string("v_cache2_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_21_cast_fp16, input = v_cache2)[name = string("coreml_update_state_93_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_93 = read_state(input = v_cache2)[name = string("coreml_update_state_93")];
+            tensor<fp16, [1024, 1024]> var_829_to_fp16 = const()[name = string("op_829_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110149760)))];
+            tensor<fp16, [1, ?, 1024]> linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_829_to_fp16, x = audio_data)[name = string("linear_42_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112246976)))];
+            tensor<fp16, [1024]> var_834_to_fp16 = const()[name = string("op_834_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114344192)))];
+            tensor<fp16, [1, ?, 1024]> linear_43_cast_fp16 = linear(bias = var_834_to_fp16, weight = var_833_to_fp16, x = audio_data)[name = string("linear_43_cast_fp16")];
+            tensor<int32, [3]> var_836_shape_cast_fp16 = shape(x = linear_42_cast_fp16)[name = string("op_836_shape_cast_fp16")];
+            int32 gather_42_axis_0 = const()[name = string("gather_42_axis_0"), val = int32(0)];
+            int32 gather_42_batch_dims_0 = const()[name = string("gather_42_batch_dims_0"), val = int32(0)];
+            bool gather_42_validate_indices_0 = const()[name = string("gather_42_validate_indices_0"), val = bool(false)];
+            string var_836_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_836_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_42_to_uint16 = const()[name = string("select_42_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_836_shape_cast_fp16_to_uint16 = cast(dtype = var_836_shape_cast_fp16_to_uint16_dtype_0, x = var_836_shape_cast_fp16)[name = string("cast_67")];
+            uint16 gather_42_cast_uint16 = gather(axis = gather_42_axis_0, batch_dims = gather_42_batch_dims_0, indices = select_42_to_uint16, validate_indices = gather_42_validate_indices_0, x = var_836_shape_cast_fp16_to_uint16)[name = string("gather_42_cast_uint16")];
+            string gather_42_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_42_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_42_cast_uint16_to_int32 = cast(dtype = gather_42_cast_uint16_to_int32_dtype_0, x = gather_42_cast_uint16)[name = string("cast_66")];
+            tensor<int32, [1]> expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = gather_42_cast_uint16_to_int32)[name = string("expand_dims_179")];
+            tensor<int32, [4]> concat_131 = const()[name = string("concat_131"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [1]> concat_132_values0_0 = const()[name = string("concat_132_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)];
+            bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (concat_132_values0_0, concat_132_values1_0, expand_dims_179, concat_132_values3_0))[name = string("concat_132")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_131, begin_mask = k_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_132, end_mask = k_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_22_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_92)[name = string("k_cache2_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_22_cast_fp16, input = k_cache2)[name = string("coreml_update_state_94_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_94 = read_state(input = k_cache2)[name = string("coreml_update_state_94")];
+            tensor<int32, [3]> var_841_shape_cast_fp16 = shape(x = linear_43_cast_fp16)[name = string("op_841_shape_cast_fp16")];
+            int32 gather_43_axis_0 = const()[name = string("gather_43_axis_0"), val = int32(0)];
+            int32 gather_43_batch_dims_0 = const()[name = string("gather_43_batch_dims_0"), val = int32(0)];
+            bool gather_43_validate_indices_0 = const()[name = string("gather_43_validate_indices_0"), val = bool(false)];
+            string var_841_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_841_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_43_to_uint16 = const()[name = string("select_43_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_841_shape_cast_fp16_to_uint16 = cast(dtype = var_841_shape_cast_fp16_to_uint16_dtype_0, x = var_841_shape_cast_fp16)[name = string("cast_65")];
+            uint16 gather_43_cast_uint16 = gather(axis = gather_43_axis_0, batch_dims = gather_43_batch_dims_0, indices = select_43_to_uint16, validate_indices = gather_43_validate_indices_0, x = var_841_shape_cast_fp16_to_uint16)[name = string("gather_43_cast_uint16")];
+            string gather_43_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_43_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_183_axes_0 = const()[name = string("expand_dims_183_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_43_cast_uint16_to_int32 = cast(dtype = gather_43_cast_uint16_to_int32_dtype_0, x = gather_43_cast_uint16)[name = string("cast_64")];
+            tensor<int32, [1]> expand_dims_183 = expand_dims(axes = expand_dims_183_axes_0, x = gather_43_cast_uint16_to_int32)[name = string("expand_dims_183")];
+            tensor<int32, [4]> concat_134 = const()[name = string("concat_134"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [1]> concat_135_values0_0 = const()[name = string("concat_135_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)];
+            bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (concat_135_values0_0, concat_135_values1_0, expand_dims_183, concat_135_values3_0))[name = string("concat_135")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_134, begin_mask = v_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_135, end_mask = v_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_22_stride_0, update = linear_43_cast_fp16, x = coreml_update_state_93)[name = string("v_cache2_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_22_cast_fp16, input = v_cache2)[name = string("coreml_update_state_95_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_95 = read_state(input = v_cache2)[name = string("coreml_update_state_95")];
+            tensor<fp16, [1024, 1024]> var_863_to_fp16 = const()[name = string("op_863_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114346304)))];
+            tensor<fp16, [1, ?, 1024]> linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_863_to_fp16, x = audio_data)[name = string("linear_44_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_867_to_fp16 = const()[name = string("op_867_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116443520)))];
+            tensor<fp16, [1024]> var_868_to_fp16 = const()[name = string("op_868_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118540736)))];
+            tensor<fp16, [1, ?, 1024]> linear_45_cast_fp16 = linear(bias = var_868_to_fp16, weight = var_867_to_fp16, x = audio_data)[name = string("linear_45_cast_fp16")];
+            tensor<int32, [3]> var_870_shape_cast_fp16 = shape(x = linear_44_cast_fp16)[name = string("op_870_shape_cast_fp16")];
+            int32 gather_44_axis_0 = const()[name = string("gather_44_axis_0"), val = int32(0)];
+            int32 gather_44_batch_dims_0 = const()[name = string("gather_44_batch_dims_0"), val = int32(0)];
+            bool gather_44_validate_indices_0 = const()[name = string("gather_44_validate_indices_0"), val = bool(false)];
+            string var_870_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_870_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_44_to_uint16 = const()[name = string("select_44_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_870_shape_cast_fp16_to_uint16 = cast(dtype = var_870_shape_cast_fp16_to_uint16_dtype_0, x = var_870_shape_cast_fp16)[name = string("cast_63")];
+            uint16 gather_44_cast_uint16 = gather(axis = gather_44_axis_0, batch_dims = gather_44_batch_dims_0, indices = select_44_to_uint16, validate_indices = gather_44_validate_indices_0, x = var_870_shape_cast_fp16_to_uint16)[name = string("gather_44_cast_uint16")];
+            string gather_44_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_44_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_187_axes_0 = const()[name = string("expand_dims_187_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_44_cast_uint16_to_int32 = cast(dtype = gather_44_cast_uint16_to_int32_dtype_0, x = gather_44_cast_uint16)[name = string("cast_62")];
+            tensor<int32, [1]> expand_dims_187 = expand_dims(axes = expand_dims_187_axes_0, x = gather_44_cast_uint16_to_int32)[name = string("expand_dims_187")];
+            tensor<int32, [4]> concat_137 = const()[name = string("concat_137"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [1]> concat_138_values0_0 = const()[name = string("concat_138_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_138_values1_0 = const()[name = string("concat_138_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_138_values3_0 = const()[name = string("concat_138_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)];
+            bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (concat_138_values0_0, concat_138_values1_0, expand_dims_187, concat_138_values3_0))[name = string("concat_138")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_137, begin_mask = k_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_138, end_mask = k_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_23_stride_0, update = linear_44_cast_fp16, x = coreml_update_state_94)[name = string("k_cache2_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_23_cast_fp16, input = k_cache2)[name = string("coreml_update_state_96_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_96 = read_state(input = k_cache2)[name = string("coreml_update_state_96")];
+            tensor<int32, [3]> var_875_shape_cast_fp16 = shape(x = linear_45_cast_fp16)[name = string("op_875_shape_cast_fp16")];
+            int32 gather_45_axis_0 = const()[name = string("gather_45_axis_0"), val = int32(0)];
+            int32 gather_45_batch_dims_0 = const()[name = string("gather_45_batch_dims_0"), val = int32(0)];
+            bool gather_45_validate_indices_0 = const()[name = string("gather_45_validate_indices_0"), val = bool(false)];
+            string var_875_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_875_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_45_to_uint16 = const()[name = string("select_45_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_875_shape_cast_fp16_to_uint16 = cast(dtype = var_875_shape_cast_fp16_to_uint16_dtype_0, x = var_875_shape_cast_fp16)[name = string("cast_61")];
+            uint16 gather_45_cast_uint16 = gather(axis = gather_45_axis_0, batch_dims = gather_45_batch_dims_0, indices = select_45_to_uint16, validate_indices = gather_45_validate_indices_0, x = var_875_shape_cast_fp16_to_uint16)[name = string("gather_45_cast_uint16")];
+            string gather_45_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_45_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_191_axes_0 = const()[name = string("expand_dims_191_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_45_cast_uint16_to_int32 = cast(dtype = gather_45_cast_uint16_to_int32_dtype_0, x = gather_45_cast_uint16)[name = string("cast_60")];
+            tensor<int32, [1]> expand_dims_191 = expand_dims(axes = expand_dims_191_axes_0, x = gather_45_cast_uint16_to_int32)[name = string("expand_dims_191")];
+            tensor<int32, [4]> concat_140 = const()[name = string("concat_140"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [1]> concat_141_values0_0 = const()[name = string("concat_141_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_141_values1_0 = const()[name = string("concat_141_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_141_values3_0 = const()[name = string("concat_141_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_141_axis_0 = const()[name = string("concat_141_axis_0"), val = int32(0)];
+            bool concat_141_interleave_0 = const()[name = string("concat_141_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_141 = concat(axis = concat_141_axis_0, interleave = concat_141_interleave_0, values = (concat_141_values0_0, concat_141_values1_0, expand_dims_191, concat_141_values3_0))[name = string("concat_141")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_140, begin_mask = v_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_141, end_mask = v_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_23_stride_0, update = linear_45_cast_fp16, x = coreml_update_state_95)[name = string("v_cache2_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_23_cast_fp16, input = v_cache2)[name = string("coreml_update_state_97_write_state")];
+            tensor<fp16, [24, 1, 1500, 1024]> coreml_update_state_97 = read_state(input = v_cache2)[name = string("coreml_update_state_97")];
+            tensor<fp16, [1024, 1024]> var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118542848)))];
+            tensor<fp16, [1, ?, 1024]> linear_46_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_897_to_fp16, x = audio_data)[name = string("linear_46_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120640064)))];
+            tensor<fp16, [1024]> var_902_to_fp16 = const()[name = string("op_902_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122737280)))];
+            tensor<fp16, [1, ?, 1024]> linear_47_cast_fp16 = linear(bias = var_902_to_fp16, weight = var_901_to_fp16, x = audio_data)[name = string("linear_47_cast_fp16")];
+            tensor<int32, [3]> var_904_shape_cast_fp16 = shape(x = linear_46_cast_fp16)[name = string("op_904_shape_cast_fp16")];
+            int32 gather_46_axis_0 = const()[name = string("gather_46_axis_0"), val = int32(0)];
+            int32 gather_46_batch_dims_0 = const()[name = string("gather_46_batch_dims_0"), val = int32(0)];
+            bool gather_46_validate_indices_0 = const()[name = string("gather_46_validate_indices_0"), val = bool(false)];
+            string var_904_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_904_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_46_to_uint16 = const()[name = string("select_46_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_904_shape_cast_fp16_to_uint16 = cast(dtype = var_904_shape_cast_fp16_to_uint16_dtype_0, x = var_904_shape_cast_fp16)[name = string("cast_59")];
+            uint16 gather_46_cast_uint16 = gather(axis = gather_46_axis_0, batch_dims = gather_46_batch_dims_0, indices = select_46_to_uint16, validate_indices = gather_46_validate_indices_0, x = var_904_shape_cast_fp16_to_uint16)[name = string("gather_46_cast_uint16")];
+            string gather_46_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_46_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_46_cast_uint16_to_int32 = cast(dtype = gather_46_cast_uint16_to_int32_dtype_0, x = gather_46_cast_uint16)[name = string("cast_58")];
+            tensor<int32, [1]> expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = gather_46_cast_uint16_to_int32)[name = string("expand_dims_195")];
+            tensor<int32, [4]> concat_143 = const()[name = string("concat_143"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [1]> concat_144_values0_0 = const()[name = string("concat_144_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_144_values1_0 = const()[name = string("concat_144_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_144_values3_0 = const()[name = string("concat_144_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_144_axis_0 = const()[name = string("concat_144_axis_0"), val = int32(0)];
+            bool concat_144_interleave_0 = const()[name = string("concat_144_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_144 = concat(axis = concat_144_axis_0, interleave = concat_144_interleave_0, values = (concat_144_values0_0, concat_144_values1_0, expand_dims_195, concat_144_values3_0))[name = string("concat_144")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> k_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_143, begin_mask = k_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_144, end_mask = k_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_24_stride_0, update = linear_46_cast_fp16, x = coreml_update_state_96)[name = string("k_cache2_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_24_cast_fp16, input = k_cache2)[name = string("coreml_update_state_98_write_state")];
+            tensor<int32, [3]> var_909_shape_cast_fp16 = shape(x = linear_47_cast_fp16)[name = string("op_909_shape_cast_fp16")];
+            int32 gather_47_axis_0 = const()[name = string("gather_47_axis_0"), val = int32(0)];
+            int32 gather_47_batch_dims_0 = const()[name = string("gather_47_batch_dims_0"), val = int32(0)];
+            bool gather_47_validate_indices_0 = const()[name = string("gather_47_validate_indices_0"), val = bool(false)];
+            string var_909_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_909_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_47_to_uint16 = const()[name = string("select_47_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_909_shape_cast_fp16_to_uint16 = cast(dtype = var_909_shape_cast_fp16_to_uint16_dtype_0, x = var_909_shape_cast_fp16)[name = string("cast_57")];
+            uint16 gather_47_cast_uint16 = gather(axis = gather_47_axis_0, batch_dims = gather_47_batch_dims_0, indices = select_47_to_uint16, validate_indices = gather_47_validate_indices_0, x = var_909_shape_cast_fp16_to_uint16)[name = string("gather_47_cast_uint16")];
+            string gather_47_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_47_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_199_axes_0 = const()[name = string("expand_dims_199_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_47_cast_uint16_to_int32 = cast(dtype = gather_47_cast_uint16_to_int32_dtype_0, x = gather_47_cast_uint16)[name = string("cast_56")];
+            tensor<int32, [1]> expand_dims_199 = expand_dims(axes = expand_dims_199_axes_0, x = gather_47_cast_uint16_to_int32)[name = string("expand_dims_199")];
+            tensor<int32, [4]> concat_146 = const()[name = string("concat_146"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [1]> concat_147_values0_0 = const()[name = string("concat_147_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)];
+            bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (concat_147_values0_0, concat_147_values1_0, expand_dims_199, concat_147_values3_0))[name = string("concat_147")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 1500, 1024]> v_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_146, begin_mask = v_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_147, end_mask = v_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_24_stride_0, update = linear_47_cast_fp16, x = coreml_update_state_97)[name = string("v_cache2_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_24_cast_fp16, input = v_cache2)[name = string("coreml_update_state_99_write_state")];
+        } -> (dummy);
+}
\ No newline at end of file
diff --git a/medium/decoder_first.mlmodelc/weights/weight.bin b/medium/decoder_first.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c3560eb93dcc3d2a99a37ff03a423674a7a91bb3
--- /dev/null
+++ b/medium/decoder_first.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:770db5fbf7afe2a5ce6088c1627c9603a75521b8a4837407cabb6376e82f72e8
+size 122739392
diff --git a/medium/decoder_second.mlmodelc/analytics/coremldata.bin b/medium/decoder_second.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d524f2e627d050bc8b5c74fe58764bfc5d7924f6
--- /dev/null
+++ b/medium/decoder_second.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c52f37c878809dca96af63fc4747def6ddfc186e51d71ca0f84e8dd484c3db4
+size 243
diff --git a/medium/decoder_second.mlmodelc/coremldata.bin b/medium/decoder_second.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0af718ba0162adfaaeb89faa081b5174a0cdeb8f
--- /dev/null
+++ b/medium/decoder_second.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:250736439ebc0224e5d43bc2ea92855a070857d494c8bd3c4cfd92a2f4dc6985
+size 487
diff --git a/medium/decoder_second.mlmodelc/metadata.json b/medium/decoder_second.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..d216bac534168e7860b10485c727ee9714477199
--- /dev/null
+++ b/medium/decoder_second.mlmodelc/metadata.json
@@ -0,0 +1,127 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.linear" : 193,
+      "Ios18.readState" : 50,
+      "Ios18.expandDims" : 25,
+      "Ios18.sub" : 1,
+      "Ios18.matmul" : 96,
+      "Ios18.gelu" : 24,
+      "Ios18.gather" : 27,
+      "Ios18.concat" : 122,
+      "Shape" : 26,
+      "Ios18.add" : 121,
+      "Ios18.sliceUpdate" : 96,
+      "Ios18.sliceByIndex" : 193,
+      "Ios18.layerNorm" : 73,
+      "Ios18.cast" : 52,
+      "Ios18.transpose" : 192,
+      "Ios18.writeState" : 48,
+      "Ios18.reshape" : 192,
+      "Ios18.softmax" : 48,
+      "Ios18.mul" : 96
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 24 × 1 × 448 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[24, 1, 448, 1024]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 24 × 1 × 448 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[24, 1, 448, 1024]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 24 × 1 × 1500 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[24, 1, 1500, 1024]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 24 × 1 × 1500 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[24, 1, 1500, 1024]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Int32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Int32 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "token_data",
+        "shortDescription" : ""
+      },
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Float16 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "offset_mask",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_second",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/medium/decoder_second.mlmodelc/model.mil b/medium/decoder_second.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..cd3d6fcbe379a4bc54b96124dd582600a3439cf0
--- /dev/null
+++ b/medium/decoder_second.mlmodelc/model.mil
@@ -0,0 +1,4738 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(state<tensor<fp16, [24, 1, 448, 1024]>> k_cache1, state<tensor<fp16, [24, 1, 1500, 1024]>> k_cache2, tensor<fp16, [1, ?]> offset_mask, tensor<int32, [1, ?]> token_data, state<tensor<fp16, [24, 1, 448, 1024]>> v_cache1, state<tensor<fp16, [24, 1, 1500, 1024]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] {
+            tensor<int32, [2]> var_62_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_62_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_62_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_62_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [2]> var_62_shape_cast_fp16_to_int16 = cast(dtype = var_62_shape_cast_fp16_to_int16_dtype_0, x = var_62_shape_cast_fp16)[name = string("cast_298")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_62_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [2]> var_66_shape = shape(x = token_data)[name = string("op_66_shape")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_66_shape_to_uint16_dtype_0 = const()[name = string("op_66_shape_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [2]> var_66_shape_to_uint16 = cast(dtype = var_66_shape_to_uint16_dtype_0, x = var_66_shape)[name = string("cast_296")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_66_shape_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_295")];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_297")];
+            int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")];
+            int32 var_122_axis_0 = const()[name = string("op_122_axis_0"), val = int32(0)];
+            int32 var_122_batch_dims_0 = const()[name = string("op_122_batch_dims_0"), val = int32(0)];
+            bool var_122_validate_indices_0 = const()[name = string("op_122_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [51865, 1024]> token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor<fp16, [51865, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, ?, 1024]> var_122_cast_fp16 = gather(axis = var_122_axis_0, batch_dims = var_122_batch_dims_0, indices = token_data, validate_indices = var_122_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_122_cast_fp16")];
+            int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)];
+            int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)];
+            bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")];
+            int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(1024)];
+            int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)];
+            bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")];
+            tensor<bool, [2]> var_125_end_mask_0 = const()[name = string("op_125_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 1024]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [448, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106219648)))];
+            tensor<fp16, [?, ?]> var_125_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_125_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_125_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_3_cast_fp16 = add(x = var_122_cast_fp16, y = var_125_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<fp16, [24, 1, 448, 1024]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")];
+            tensor<fp16, [24, 1, 448, 1024]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")];
+            tensor<fp16, [24, 1, 1500, 1024]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<int32, [4]> k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")];
+            tensor<fp16, [24, 1, 1500, 1024]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<int32, [4]> v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")];
+            int32 var_148 = const()[name = string("op_148"), val = int32(-1)];
+            tensor<int32, [1]> var_166_axes_0 = const()[name = string("op_166_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107137216)))];
+            tensor<fp16, [1024]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107139328)))];
+            fp16 var_154_to_fp16 = const()[name = string("op_154_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_166_cast_fp16 = layer_norm(axes = var_166_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_154_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_166_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_177_to_fp16 = const()[name = string("op_177_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107141440)))];
+            tensor<fp16, [1024]> var_178_to_fp16 = const()[name = string("op_178_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109238656)))];
+            tensor<fp16, [1, ?, 1024]> linear_0_cast_fp16 = linear(bias = var_178_to_fp16, weight = var_177_to_fp16, x = var_166_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_181_to_fp16 = const()[name = string("op_181_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109240768)))];
+            tensor<fp16, [1024]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111337984)))];
+            tensor<fp16, [1, ?, 1024]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_181_to_fp16, x = var_166_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_185_to_fp16 = const()[name = string("op_185_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111340096)))];
+            tensor<fp16, [1024]> var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113437312)))];
+            tensor<fp16, [1, ?, 1024]> linear_2_cast_fp16 = linear(bias = var_186_to_fp16, weight = var_185_to_fp16, x = var_166_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [3]> var_188_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_188_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_188_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_188_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_188_shape_cast_fp16_to_uint16 = cast(dtype = var_188_shape_cast_fp16_to_uint16_dtype_0, x = var_188_shape_cast_fp16)[name = string("cast_294")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_188_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_293")];
+            int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")];
+            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")];
+            tensor<int32, [1]> expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")];
+            tensor<int32, [1]> concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)];
+            bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")];
+            tensor<int32, [1]> concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)];
+            bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_48_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_48 = read_state(input = k_cache1)[name = string("coreml_update_state_48")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_49_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_49 = read_state(input = v_cache1)[name = string("coreml_update_state_49")];
+            int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)];
+            int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(1024)];
+            int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
+            bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")];
+            tensor<int32, [3]> var_204_begin_0 = const()[name = string("op_204_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_204_end_mask_0 = const()[name = string("op_204_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_204_cast_fp16 = slice_by_index(begin = var_204_begin_0, end = concat_10, end_mask = var_204_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_204_cast_fp16")];
+            tensor<int32, [3]> var_207_begin_0 = const()[name = string("op_207_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_207_end_mask_0 = const()[name = string("op_207_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = concat_10, end_mask = var_207_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_207_cast_fp16")];
+            tensor<int32, [4]> concat_12x = const()[name = string("concat_12x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_217_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_217_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_120_to_fp16 = const()[name = string("const_120_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_3_cast_fp16 = mul(x = var_217_cast_fp16, y = const_120_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> concat_13x = const()[name = string("concat_13x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_224_cast_fp16 = reshape(shape = concat_13x, x = var_204_cast_fp16)[name = string("op_224_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_121_to_fp16 = const()[name = string("const_121_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_5_cast_fp16 = mul(x = var_224_cast_fp16, y = const_121_to_fp16)[name = string("k_5_cast_fp16")];
+            tensor<int32, [4]> concat_14x = const()[name = string("concat_14x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_231_cast_fp16 = reshape(shape = concat_14x, x = var_207_cast_fp16)[name = string("op_231_cast_fp16")];
+            tensor<int32, [4]> var_232 = const()[name = string("op_232"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_193_perm_0 = const()[name = string("transpose_193_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_194_perm_0 = const()[name = string("transpose_194_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_194 = transpose(perm = transpose_194_perm_0, x = k_5_cast_fp16)[name = string("transpose_478")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_193 = transpose(perm = transpose_193_perm_0, x = q_3_cast_fp16)[name = string("transpose_479")];
+            tensor<fp16, [1, 16, ?, ?]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_193, y = transpose_194)[name = string("qk_1_cast_fp16")];
+            int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")];
+            tensor<int32, [2]> var_235_begin_0 = const()[name = string("op_235_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_235_end_mask_0 = const()[name = string("op_235_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 448]> mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor<fp16, [448, 448]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113439424)))];
+            tensor<fp16, [?, 448]> var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = concat_15, end_mask = var_235_end_mask_0, x = mask_to_fp16)[name = string("op_235_cast_fp16")];
+            int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)];
+            int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)];
+            bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")];
+            tensor<int32, [2]> var_236_begin_0 = const()[name = string("op_236_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_236_end_mask_0 = const()[name = string("op_236_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = concat_16, end_mask = var_236_end_mask_0, x = var_235_cast_fp16)[name = string("op_236_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_236_cast_fp16)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_239_cast_fp16 = softmax(axis = var_148, x = qk_3_cast_fp16)[name = string("op_239_cast_fp16")];
+            bool var_241_transpose_x_0 = const()[name = string("op_241_transpose_x_0"), val = bool(false)];
+            bool var_241_transpose_y_0 = const()[name = string("op_241_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_5_cast_fp16 = transpose(perm = var_232, x = var_231_cast_fp16)[name = string("transpose_480")];
+            tensor<fp16, [1, 16, ?, 64]> var_241_cast_fp16 = matmul(transpose_x = var_241_transpose_x_0, transpose_y = var_241_transpose_y_0, x = var_239_cast_fp16, y = v_5_cast_fp16)[name = string("op_241_cast_fp16")];
+            tensor<int32, [4]> var_242 = const()[name = string("op_242"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_17x = const()[name = string("concat_17x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_243_cast_fp16 = transpose(perm = var_242, x = var_241_cast_fp16)[name = string("transpose_477")];
+            tensor<fp16, [1, ?, 1024]> x_7_cast_fp16 = reshape(shape = concat_17x, x = var_243_cast_fp16)[name = string("x_7_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_247_to_fp16 = const()[name = string("op_247_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113840896)))];
+            tensor<fp16, [1024]> var_248_to_fp16 = const()[name = string("op_248_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115938112)))];
+            tensor<fp16, [1, ?, 1024]> linear_3_cast_fp16 = linear(bias = var_248_to_fp16, weight = var_247_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")];
+            tensor<int32, [1]> var_255_axes_0 = const()[name = string("op_255_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115940224)))];
+            tensor<fp16, [1024]> blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115942336)))];
+            tensor<fp16, [1, ?, 1024]> var_255_cast_fp16 = layer_norm(axes = var_255_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_154_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_255_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115944448)))];
+            tensor<fp16, [1024]> var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118041664)))];
+            tensor<fp16, [1, ?, 1024]> linear_4_cast_fp16 = linear(bias = var_265_to_fp16, weight = var_264_to_fp16, x = var_255_cast_fp16)[name = string("linear_4_cast_fp16")];
+            tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor<fp16, [1, 1500, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118043776)))];
+            tensor<fp16, [1, 1500, 1024]> k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_22x = const()[name = string("concat_22x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_285_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_285_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_122_to_fp16 = const()[name = string("const_122_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_7_cast_fp16 = mul(x = var_285_cast_fp16, y = const_122_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_291 = const()[name = string("op_291"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_292_cast_fp16 = reshape(shape = var_291, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_292_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_9_cast_fp16 = mul(x = var_292_cast_fp16, y = const_123_to_fp16)[name = string("k_9_cast_fp16")];
+            tensor<int32, [4]> var_298 = const()[name = string("op_298"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_299_cast_fp16 = reshape(shape = var_298, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_299_cast_fp16")];
+            tensor<int32, [4]> var_300 = const()[name = string("op_300"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_195_perm_0 = const()[name = string("transpose_195_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_196_perm_0 = const()[name = string("transpose_196_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_196 = transpose(perm = transpose_196_perm_0, x = k_9_cast_fp16)[name = string("transpose_474")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_195 = transpose(perm = transpose_195_perm_0, x = q_7_cast_fp16)[name = string("transpose_475")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_195, y = transpose_196)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_304_cast_fp16 = softmax(axis = var_148, x = qk_5_cast_fp16)[name = string("op_304_cast_fp16")];
+            bool var_306_transpose_x_0 = const()[name = string("op_306_transpose_x_0"), val = bool(false)];
+            bool var_306_transpose_y_0 = const()[name = string("op_306_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_9_cast_fp16 = transpose(perm = var_300, x = var_299_cast_fp16)[name = string("transpose_476")];
+            tensor<fp16, [1, 16, ?, 64]> var_306_cast_fp16 = matmul(transpose_x = var_306_transpose_x_0, transpose_y = var_306_transpose_y_0, x = var_304_cast_fp16, y = v_9_cast_fp16)[name = string("op_306_cast_fp16")];
+            tensor<int32, [4]> var_307 = const()[name = string("op_307"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_23x = const()[name = string("concat_23x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_308_cast_fp16 = transpose(perm = var_307, x = var_306_cast_fp16)[name = string("transpose_473")];
+            tensor<fp16, [1, ?, 1024]> x_13_cast_fp16 = reshape(shape = concat_23x, x = var_308_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_312_to_fp16 = const()[name = string("op_312_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121115840)))];
+            tensor<fp16, [1024]> var_313_to_fp16 = const()[name = string("op_313_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123213056)))];
+            tensor<fp16, [1, ?, 1024]> linear_5_cast_fp16 = linear(bias = var_313_to_fp16, weight = var_312_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")];
+            tensor<int32, [1]> var_320_axes_0 = const()[name = string("op_320_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123215168)))];
+            tensor<fp16, [1024]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123217280)))];
+            tensor<fp16, [1, ?, 1024]> var_320_cast_fp16 = layer_norm(axes = var_320_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_154_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_320_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_329_to_fp16 = const()[name = string("op_329_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123219392)))];
+            tensor<fp16, [4096]> var_330_to_fp16 = const()[name = string("op_330_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131608064)))];
+            tensor<fp16, [1, ?, 4096]> linear_6_cast_fp16 = linear(bias = var_330_to_fp16, weight = var_329_to_fp16, x = var_320_cast_fp16)[name = string("linear_6_cast_fp16")];
+            string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_335_to_fp16 = const()[name = string("op_335_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131616320)))];
+            tensor<fp16, [1024]> var_336_to_fp16 = const()[name = string("op_336_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140004992)))];
+            tensor<fp16, [1, ?, 1024]> linear_7_cast_fp16 = linear(bias = var_336_to_fp16, weight = var_335_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_48)[name = string("k_cache_5_cast_fp16")];
+            tensor<int32, [4]> v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_49)[name = string("v_cache_5_cast_fp16")];
+            tensor<int32, [4]> k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")];
+            tensor<int32, [4]> v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")];
+            int32 var_359 = const()[name = string("op_359"), val = int32(-1)];
+            tensor<int32, [1]> var_377_axes_0 = const()[name = string("op_377_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140007104)))];
+            tensor<fp16, [1024]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140009216)))];
+            fp16 var_365_to_fp16 = const()[name = string("op_365_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_377_cast_fp16 = layer_norm(axes = var_377_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_365_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_377_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_388_to_fp16 = const()[name = string("op_388_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140011328)))];
+            tensor<fp16, [1024]> var_389_to_fp16 = const()[name = string("op_389_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142108544)))];
+            tensor<fp16, [1, ?, 1024]> linear_8_cast_fp16 = linear(bias = var_389_to_fp16, weight = var_388_to_fp16, x = var_377_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_392_to_fp16 = const()[name = string("op_392_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142110656)))];
+            tensor<fp16, [1, ?, 1024]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_392_to_fp16, x = var_377_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_396_to_fp16 = const()[name = string("op_396_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144207872)))];
+            tensor<fp16, [1024]> var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146305088)))];
+            tensor<fp16, [1, ?, 1024]> linear_10_cast_fp16 = linear(bias = var_397_to_fp16, weight = var_396_to_fp16, x = var_377_cast_fp16)[name = string("linear_10_cast_fp16")];
+            tensor<int32, [3]> var_399_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_399_shape_cast_fp16")];
+            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
+            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
+            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
+            string var_399_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_399_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_399_shape_cast_fp16_to_uint16 = cast(dtype = var_399_shape_cast_fp16_to_uint16_dtype_0, x = var_399_shape_cast_fp16)[name = string("cast_292")];
+            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_399_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
+            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_291")];
+            int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")];
+            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")];
+            tensor<int32, [1]> concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor<int32, [1]>([1])];
+            int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
+            bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_48)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_50_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_50 = read_state(input = k_cache1)[name = string("coreml_update_state_50")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_49)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_51_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_51 = read_state(input = v_cache1)[name = string("coreml_update_state_51")];
+            int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)];
+            int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(1024)];
+            int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)];
+            bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")];
+            tensor<int32, [3]> var_415_begin_0 = const()[name = string("op_415_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_415_end_mask_0 = const()[name = string("op_415_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_415_cast_fp16 = slice_by_index(begin = var_415_begin_0, end = concat_32, end_mask = var_415_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_415_cast_fp16")];
+            tensor<int32, [3]> var_418_begin_0 = const()[name = string("op_418_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_418_end_mask_0 = const()[name = string("op_418_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_418_cast_fp16 = slice_by_index(begin = var_418_begin_0, end = concat_32, end_mask = var_418_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_418_cast_fp16")];
+            tensor<int32, [4]> concat_34x = const()[name = string("concat_34x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_428_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_428_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_124_to_fp16 = const()[name = string("const_124_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_11_cast_fp16 = mul(x = var_428_cast_fp16, y = const_124_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> concat_35x = const()[name = string("concat_35x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_435_cast_fp16 = reshape(shape = concat_35x, x = var_415_cast_fp16)[name = string("op_435_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_125_to_fp16 = const()[name = string("const_125_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_15_cast_fp16 = mul(x = var_435_cast_fp16, y = const_125_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> concat_36x = const()[name = string("concat_36x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_442_cast_fp16 = reshape(shape = concat_36x, x = var_418_cast_fp16)[name = string("op_442_cast_fp16")];
+            tensor<int32, [4]> var_443 = const()[name = string("op_443"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_197_perm_0 = const()[name = string("transpose_197_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_198_perm_0 = const()[name = string("transpose_198_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_198 = transpose(perm = transpose_198_perm_0, x = k_15_cast_fp16)[name = string("transpose_470")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_197 = transpose(perm = transpose_197_perm_0, x = q_11_cast_fp16)[name = string("transpose_471")];
+            tensor<fp16, [1, 16, ?, ?]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_197, y = transpose_198)[name = string("qk_7_cast_fp16")];
+            int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)];
+            int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)];
+            bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")];
+            tensor<int32, [2]> var_446_begin_0 = const()[name = string("op_446_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_446_end_mask_0 = const()[name = string("op_446_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_446_cast_fp16 = slice_by_index(begin = var_446_begin_0, end = concat_37, end_mask = var_446_end_mask_0, x = mask_to_fp16)[name = string("op_446_cast_fp16")];
+            int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)];
+            int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
+            bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")];
+            tensor<int32, [2]> var_447_begin_0 = const()[name = string("op_447_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_447_end_mask_0 = const()[name = string("op_447_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_447_cast_fp16 = slice_by_index(begin = var_447_begin_0, end = concat_38, end_mask = var_447_end_mask_0, x = var_446_cast_fp16)[name = string("op_447_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_447_cast_fp16)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_450_cast_fp16 = softmax(axis = var_359, x = qk_9_cast_fp16)[name = string("op_450_cast_fp16")];
+            bool var_452_transpose_x_0 = const()[name = string("op_452_transpose_x_0"), val = bool(false)];
+            bool var_452_transpose_y_0 = const()[name = string("op_452_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_15_cast_fp16 = transpose(perm = var_443, x = var_442_cast_fp16)[name = string("transpose_472")];
+            tensor<fp16, [1, 16, ?, 64]> var_452_cast_fp16 = matmul(transpose_x = var_452_transpose_x_0, transpose_y = var_452_transpose_y_0, x = var_450_cast_fp16, y = v_15_cast_fp16)[name = string("op_452_cast_fp16")];
+            tensor<int32, [4]> var_453 = const()[name = string("op_453"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_39x = const()[name = string("concat_39x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_454_cast_fp16 = transpose(perm = var_453, x = var_452_cast_fp16)[name = string("transpose_469")];
+            tensor<fp16, [1, ?, 1024]> x_25_cast_fp16 = reshape(shape = concat_39x, x = var_454_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_458_to_fp16 = const()[name = string("op_458_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146307200)))];
+            tensor<fp16, [1024]> var_459_to_fp16 = const()[name = string("op_459_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148404416)))];
+            tensor<fp16, [1, ?, 1024]> linear_11_cast_fp16 = linear(bias = var_459_to_fp16, weight = var_458_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")];
+            tensor<int32, [1]> var_466_axes_0 = const()[name = string("op_466_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148406528)))];
+            tensor<fp16, [1024]> blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148408640)))];
+            tensor<fp16, [1, ?, 1024]> var_466_cast_fp16 = layer_norm(axes = var_466_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_365_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_466_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148410752)))];
+            tensor<fp16, [1024]> var_476_to_fp16 = const()[name = string("op_476_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150507968)))];
+            tensor<fp16, [1, ?, 1024]> linear_12_cast_fp16 = linear(bias = var_476_to_fp16, weight = var_475_to_fp16, x = var_466_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<int32, [3]> concat_40 = const()[name = string("concat_40"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_42 = const()[name = string("concat_42"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_43 = const()[name = string("concat_43"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_44x = const()[name = string("concat_44x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_496_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_496_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_126_to_fp16 = const()[name = string("const_126_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_15_cast_fp16 = mul(x = var_496_cast_fp16, y = const_126_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_502 = const()[name = string("op_502"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_503_cast_fp16 = reshape(shape = var_502, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_503_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_127_to_fp16 = const()[name = string("const_127_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_19_cast_fp16 = mul(x = var_503_cast_fp16, y = const_127_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_509 = const()[name = string("op_509"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_510_cast_fp16 = reshape(shape = var_509, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_510_cast_fp16")];
+            tensor<int32, [4]> var_511 = const()[name = string("op_511"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
+            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_199_perm_0 = const()[name = string("transpose_199_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_200_perm_0 = const()[name = string("transpose_200_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_200 = transpose(perm = transpose_200_perm_0, x = k_19_cast_fp16)[name = string("transpose_466")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_199 = transpose(perm = transpose_199_perm_0, x = q_15_cast_fp16)[name = string("transpose_467")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_199, y = transpose_200)[name = string("qk_11_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_515_cast_fp16 = softmax(axis = var_359, x = qk_11_cast_fp16)[name = string("op_515_cast_fp16")];
+            bool var_517_transpose_x_0 = const()[name = string("op_517_transpose_x_0"), val = bool(false)];
+            bool var_517_transpose_y_0 = const()[name = string("op_517_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_511, x = var_510_cast_fp16)[name = string("transpose_468")];
+            tensor<fp16, [1, 16, ?, 64]> var_517_cast_fp16 = matmul(transpose_x = var_517_transpose_x_0, transpose_y = var_517_transpose_y_0, x = var_515_cast_fp16, y = v_19_cast_fp16)[name = string("op_517_cast_fp16")];
+            tensor<int32, [4]> var_518 = const()[name = string("op_518"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_45x = const()[name = string("concat_45x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_519_cast_fp16 = transpose(perm = var_518, x = var_517_cast_fp16)[name = string("transpose_465")];
+            tensor<fp16, [1, ?, 1024]> x_31_cast_fp16 = reshape(shape = concat_45x, x = var_519_cast_fp16)[name = string("x_31_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_523_to_fp16 = const()[name = string("op_523_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150510080)))];
+            tensor<fp16, [1024]> var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152607296)))];
+            tensor<fp16, [1, ?, 1024]> linear_13_cast_fp16 = linear(bias = var_524_to_fp16, weight = var_523_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")];
+            tensor<int32, [1]> var_531_axes_0 = const()[name = string("op_531_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152609408)))];
+            tensor<fp16, [1024]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152611520)))];
+            tensor<fp16, [1, ?, 1024]> var_531_cast_fp16 = layer_norm(axes = var_531_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_365_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_531_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_540_to_fp16 = const()[name = string("op_540_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152613632)))];
+            tensor<fp16, [4096]> var_541_to_fp16 = const()[name = string("op_541_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161002304)))];
+            tensor<fp16, [1, ?, 4096]> linear_14_cast_fp16 = linear(bias = var_541_to_fp16, weight = var_540_to_fp16, x = var_531_cast_fp16)[name = string("linear_14_cast_fp16")];
+            string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_546_to_fp16 = const()[name = string("op_546_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161010560)))];
+            tensor<fp16, [1024]> var_547_to_fp16 = const()[name = string("op_547_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169399232)))];
+            tensor<fp16, [1, ?, 1024]> linear_15_cast_fp16 = linear(bias = var_547_to_fp16, weight = var_546_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")];
+            tensor<int32, [4]> k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_50)[name = string("k_cache_9_cast_fp16")];
+            tensor<int32, [4]> v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_51)[name = string("v_cache_9_cast_fp16")];
+            tensor<int32, [4]> k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")];
+            tensor<int32, [4]> v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")];
+            int32 var_570 = const()[name = string("op_570"), val = int32(-1)];
+            tensor<int32, [1]> var_588_axes_0 = const()[name = string("op_588_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169401344)))];
+            tensor<fp16, [1024]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169403456)))];
+            fp16 var_576_to_fp16 = const()[name = string("op_576_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_588_cast_fp16 = layer_norm(axes = var_588_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_576_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_588_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_599_to_fp16 = const()[name = string("op_599_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169405568)))];
+            tensor<fp16, [1024]> var_600_to_fp16 = const()[name = string("op_600_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171502784)))];
+            tensor<fp16, [1, ?, 1024]> linear_16_cast_fp16 = linear(bias = var_600_to_fp16, weight = var_599_to_fp16, x = var_588_cast_fp16)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_603_to_fp16 = const()[name = string("op_603_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171504896)))];
+            tensor<fp16, [1, ?, 1024]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_603_to_fp16, x = var_588_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_607_to_fp16 = const()[name = string("op_607_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173602112)))];
+            tensor<fp16, [1024]> var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175699328)))];
+            tensor<fp16, [1, ?, 1024]> linear_18_cast_fp16 = linear(bias = var_608_to_fp16, weight = var_607_to_fp16, x = var_588_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<int32, [3]> var_610_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_610_shape_cast_fp16")];
+            int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
+            int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
+            bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
+            string var_610_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_610_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_610_shape_cast_fp16_to_uint16 = cast(dtype = var_610_shape_cast_fp16_to_uint16_dtype_0, x = var_610_shape_cast_fp16)[name = string("cast_290")];
+            uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_610_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
+            string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_289")];
+            int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")];
+            tensor<int32, [1]> expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")];
+            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([2])];
+            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
+            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")];
+            tensor<int32, [1]> concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)];
+            bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_50)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_52_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_52 = read_state(input = k_cache1)[name = string("coreml_update_state_52")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_51)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_53_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_53 = read_state(input = v_cache1)[name = string("coreml_update_state_53")];
+            int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)];
+            int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(1024)];
+            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
+            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")];
+            tensor<int32, [3]> var_626_begin_0 = const()[name = string("op_626_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_626_end_mask_0 = const()[name = string("op_626_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_626_cast_fp16 = slice_by_index(begin = var_626_begin_0, end = concat_54, end_mask = var_626_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_626_cast_fp16")];
+            tensor<int32, [3]> var_629_begin_0 = const()[name = string("op_629_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_629_end_mask_0 = const()[name = string("op_629_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_629_cast_fp16 = slice_by_index(begin = var_629_begin_0, end = concat_54, end_mask = var_629_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_629_cast_fp16")];
+            tensor<int32, [4]> concat_56x = const()[name = string("concat_56x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_639_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_639_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_128_to_fp16 = const()[name = string("const_128_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_19_cast_fp16 = mul(x = var_639_cast_fp16, y = const_128_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> concat_57x = const()[name = string("concat_57x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_646_cast_fp16 = reshape(shape = concat_57x, x = var_626_cast_fp16)[name = string("op_646_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_129_to_fp16 = const()[name = string("const_129_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_25_cast_fp16 = mul(x = var_646_cast_fp16, y = const_129_to_fp16)[name = string("k_25_cast_fp16")];
+            tensor<int32, [4]> concat_58x = const()[name = string("concat_58x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_653_cast_fp16 = reshape(shape = concat_58x, x = var_629_cast_fp16)[name = string("op_653_cast_fp16")];
+            tensor<int32, [4]> var_654 = const()[name = string("op_654"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
+            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_201_perm_0 = const()[name = string("transpose_201_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_202_perm_0 = const()[name = string("transpose_202_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_202 = transpose(perm = transpose_202_perm_0, x = k_25_cast_fp16)[name = string("transpose_462")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_201 = transpose(perm = transpose_201_perm_0, x = q_19_cast_fp16)[name = string("transpose_463")];
+            tensor<fp16, [1, 16, ?, ?]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_201, y = transpose_202)[name = string("qk_13_cast_fp16")];
+            int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)];
+            int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)];
+            bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")];
+            tensor<int32, [2]> var_657_begin_0 = const()[name = string("op_657_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_657_end_mask_0 = const()[name = string("op_657_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_657_cast_fp16 = slice_by_index(begin = var_657_begin_0, end = concat_59, end_mask = var_657_end_mask_0, x = mask_to_fp16)[name = string("op_657_cast_fp16")];
+            int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)];
+            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
+            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")];
+            tensor<int32, [2]> var_658_begin_0 = const()[name = string("op_658_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_658_end_mask_0 = const()[name = string("op_658_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_658_cast_fp16 = slice_by_index(begin = var_658_begin_0, end = concat_60, end_mask = var_658_end_mask_0, x = var_657_cast_fp16)[name = string("op_658_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_658_cast_fp16)[name = string("qk_15_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_661_cast_fp16 = softmax(axis = var_570, x = qk_15_cast_fp16)[name = string("op_661_cast_fp16")];
+            bool var_663_transpose_x_0 = const()[name = string("op_663_transpose_x_0"), val = bool(false)];
+            bool var_663_transpose_y_0 = const()[name = string("op_663_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_25_cast_fp16 = transpose(perm = var_654, x = var_653_cast_fp16)[name = string("transpose_464")];
+            tensor<fp16, [1, 16, ?, 64]> var_663_cast_fp16 = matmul(transpose_x = var_663_transpose_x_0, transpose_y = var_663_transpose_y_0, x = var_661_cast_fp16, y = v_25_cast_fp16)[name = string("op_663_cast_fp16")];
+            tensor<int32, [4]> var_664 = const()[name = string("op_664"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_61x = const()[name = string("concat_61x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_665_cast_fp16 = transpose(perm = var_664, x = var_663_cast_fp16)[name = string("transpose_461")];
+            tensor<fp16, [1, ?, 1024]> x_43_cast_fp16 = reshape(shape = concat_61x, x = var_665_cast_fp16)[name = string("x_43_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175701440)))];
+            tensor<fp16, [1024]> var_670_to_fp16 = const()[name = string("op_670_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177798656)))];
+            tensor<fp16, [1, ?, 1024]> linear_19_cast_fp16 = linear(bias = var_670_to_fp16, weight = var_669_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")];
+            tensor<int32, [1]> var_677_axes_0 = const()[name = string("op_677_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177800768)))];
+            tensor<fp16, [1024]> blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177802880)))];
+            tensor<fp16, [1, ?, 1024]> var_677_cast_fp16 = layer_norm(axes = var_677_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_576_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_677_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177804992)))];
+            tensor<fp16, [1024]> var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179902208)))];
+            tensor<fp16, [1, ?, 1024]> linear_20_cast_fp16 = linear(bias = var_687_to_fp16, weight = var_686_to_fp16, x = var_677_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [3]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_63 = const()[name = string("concat_63"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_64 = const()[name = string("concat_64"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_66x = const()[name = string("concat_66x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_707_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_707_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_130_to_fp16 = const()[name = string("const_130_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_23_cast_fp16 = mul(x = var_707_cast_fp16, y = const_130_to_fp16)[name = string("q_23_cast_fp16")];
+            tensor<int32, [4]> var_713 = const()[name = string("op_713"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_714_cast_fp16 = reshape(shape = var_713, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_714_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_131_to_fp16 = const()[name = string("const_131_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_29_cast_fp16 = mul(x = var_714_cast_fp16, y = const_131_to_fp16)[name = string("k_29_cast_fp16")];
+            tensor<int32, [4]> var_720 = const()[name = string("op_720"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_721_cast_fp16 = reshape(shape = var_720, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_721_cast_fp16")];
+            tensor<int32, [4]> var_722 = const()[name = string("op_722"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
+            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_203_perm_0 = const()[name = string("transpose_203_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_204_perm_0 = const()[name = string("transpose_204_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_204 = transpose(perm = transpose_204_perm_0, x = k_29_cast_fp16)[name = string("transpose_458")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_203 = transpose(perm = transpose_203_perm_0, x = q_23_cast_fp16)[name = string("transpose_459")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_203, y = transpose_204)[name = string("qk_17_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_726_cast_fp16 = softmax(axis = var_570, x = qk_17_cast_fp16)[name = string("op_726_cast_fp16")];
+            bool var_728_transpose_x_0 = const()[name = string("op_728_transpose_x_0"), val = bool(false)];
+            bool var_728_transpose_y_0 = const()[name = string("op_728_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_29_cast_fp16 = transpose(perm = var_722, x = var_721_cast_fp16)[name = string("transpose_460")];
+            tensor<fp16, [1, 16, ?, 64]> var_728_cast_fp16 = matmul(transpose_x = var_728_transpose_x_0, transpose_y = var_728_transpose_y_0, x = var_726_cast_fp16, y = v_29_cast_fp16)[name = string("op_728_cast_fp16")];
+            tensor<int32, [4]> var_729 = const()[name = string("op_729"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_67x = const()[name = string("concat_67x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_730_cast_fp16 = transpose(perm = var_729, x = var_728_cast_fp16)[name = string("transpose_457")];
+            tensor<fp16, [1, ?, 1024]> x_49_cast_fp16 = reshape(shape = concat_67x, x = var_730_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_734_to_fp16 = const()[name = string("op_734_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179904320)))];
+            tensor<fp16, [1024]> var_735_to_fp16 = const()[name = string("op_735_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182001536)))];
+            tensor<fp16, [1, ?, 1024]> linear_21_cast_fp16 = linear(bias = var_735_to_fp16, weight = var_734_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")];
+            tensor<int32, [1]> var_742_axes_0 = const()[name = string("op_742_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182003648)))];
+            tensor<fp16, [1024]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182005760)))];
+            tensor<fp16, [1, ?, 1024]> var_742_cast_fp16 = layer_norm(axes = var_742_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_576_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_742_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_751_to_fp16 = const()[name = string("op_751_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182007872)))];
+            tensor<fp16, [4096]> var_752_to_fp16 = const()[name = string("op_752_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190396544)))];
+            tensor<fp16, [1, ?, 4096]> linear_22_cast_fp16 = linear(bias = var_752_to_fp16, weight = var_751_to_fp16, x = var_742_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_757_to_fp16 = const()[name = string("op_757_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190404800)))];
+            tensor<fp16, [1024]> var_758_to_fp16 = const()[name = string("op_758_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198793472)))];
+            tensor<fp16, [1, ?, 1024]> linear_23_cast_fp16 = linear(bias = var_758_to_fp16, weight = var_757_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")];
+            tensor<int32, [4]> k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_52)[name = string("k_cache_13_cast_fp16")];
+            tensor<int32, [4]> v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_53)[name = string("v_cache_13_cast_fp16")];
+            tensor<int32, [4]> k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")];
+            tensor<int32, [4]> v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")];
+            int32 var_781 = const()[name = string("op_781"), val = int32(-1)];
+            tensor<int32, [1]> var_799_axes_0 = const()[name = string("op_799_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198795584)))];
+            tensor<fp16, [1024]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198797696)))];
+            fp16 var_787_to_fp16 = const()[name = string("op_787_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_799_cast_fp16 = layer_norm(axes = var_799_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_787_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_799_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_810_to_fp16 = const()[name = string("op_810_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198799808)))];
+            tensor<fp16, [1024]> var_811_to_fp16 = const()[name = string("op_811_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200897024)))];
+            tensor<fp16, [1, ?, 1024]> linear_24_cast_fp16 = linear(bias = var_811_to_fp16, weight = var_810_to_fp16, x = var_799_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_814_to_fp16 = const()[name = string("op_814_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200899136)))];
+            tensor<fp16, [1, ?, 1024]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_814_to_fp16, x = var_799_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_818_to_fp16 = const()[name = string("op_818_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202996352)))];
+            tensor<fp16, [1024]> var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205093568)))];
+            tensor<fp16, [1, ?, 1024]> linear_26_cast_fp16 = linear(bias = var_819_to_fp16, weight = var_818_to_fp16, x = var_799_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [3]> var_821_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_821_shape_cast_fp16")];
+            int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
+            int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
+            bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
+            string var_821_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_821_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_821_shape_cast_fp16_to_uint16 = cast(dtype = var_821_shape_cast_fp16_to_uint16_dtype_0, x = var_821_shape_cast_fp16)[name = string("cast_288")];
+            uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_821_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
+            string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_287")];
+            int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")];
+            tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")];
+            tensor<int32, [1]> concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor<int32, [1]>([3])];
+            int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)];
+            bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")];
+            tensor<int32, [1]> concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)];
+            bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_52)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_54_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_54 = read_state(input = k_cache1)[name = string("coreml_update_state_54")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_53)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_55_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_55 = read_state(input = v_cache1)[name = string("coreml_update_state_55")];
+            int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)];
+            int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(1024)];
+            int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)];
+            bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")];
+            tensor<int32, [3]> var_837_begin_0 = const()[name = string("op_837_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_837_end_mask_0 = const()[name = string("op_837_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = concat_76, end_mask = var_837_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_837_cast_fp16")];
+            tensor<int32, [3]> var_840_begin_0 = const()[name = string("op_840_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_840_end_mask_0 = const()[name = string("op_840_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_840_cast_fp16 = slice_by_index(begin = var_840_begin_0, end = concat_76, end_mask = var_840_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_840_cast_fp16")];
+            tensor<int32, [4]> concat_78x = const()[name = string("concat_78x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_850_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_850_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_132_to_fp16 = const()[name = string("const_132_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_27_cast_fp16 = mul(x = var_850_cast_fp16, y = const_132_to_fp16)[name = string("q_27_cast_fp16")];
+            tensor<int32, [4]> concat_79x = const()[name = string("concat_79x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_857_cast_fp16 = reshape(shape = concat_79x, x = var_837_cast_fp16)[name = string("op_857_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_133_to_fp16 = const()[name = string("const_133_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_35_cast_fp16 = mul(x = var_857_cast_fp16, y = const_133_to_fp16)[name = string("k_35_cast_fp16")];
+            tensor<int32, [4]> concat_80x = const()[name = string("concat_80x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_864_cast_fp16 = reshape(shape = concat_80x, x = var_840_cast_fp16)[name = string("op_864_cast_fp16")];
+            tensor<int32, [4]> var_865 = const()[name = string("op_865"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
+            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_205_perm_0 = const()[name = string("transpose_205_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_206_perm_0 = const()[name = string("transpose_206_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_206 = transpose(perm = transpose_206_perm_0, x = k_35_cast_fp16)[name = string("transpose_454")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_205 = transpose(perm = transpose_205_perm_0, x = q_27_cast_fp16)[name = string("transpose_455")];
+            tensor<fp16, [1, 16, ?, ?]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_205, y = transpose_206)[name = string("qk_19_cast_fp16")];
+            int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)];
+            int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
+            bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")];
+            tensor<int32, [2]> var_868_begin_0 = const()[name = string("op_868_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_868_end_mask_0 = const()[name = string("op_868_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_868_cast_fp16 = slice_by_index(begin = var_868_begin_0, end = concat_81, end_mask = var_868_end_mask_0, x = mask_to_fp16)[name = string("op_868_cast_fp16")];
+            int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)];
+            int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)];
+            bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")];
+            tensor<int32, [2]> var_869_begin_0 = const()[name = string("op_869_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_869_end_mask_0 = const()[name = string("op_869_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = concat_82, end_mask = var_869_end_mask_0, x = var_868_cast_fp16)[name = string("op_869_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_869_cast_fp16)[name = string("qk_21_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_872_cast_fp16 = softmax(axis = var_781, x = qk_21_cast_fp16)[name = string("op_872_cast_fp16")];
+            bool var_874_transpose_x_0 = const()[name = string("op_874_transpose_x_0"), val = bool(false)];
+            bool var_874_transpose_y_0 = const()[name = string("op_874_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_35_cast_fp16 = transpose(perm = var_865, x = var_864_cast_fp16)[name = string("transpose_456")];
+            tensor<fp16, [1, 16, ?, 64]> var_874_cast_fp16 = matmul(transpose_x = var_874_transpose_x_0, transpose_y = var_874_transpose_y_0, x = var_872_cast_fp16, y = v_35_cast_fp16)[name = string("op_874_cast_fp16")];
+            tensor<int32, [4]> var_875 = const()[name = string("op_875"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_83x = const()[name = string("concat_83x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_876_cast_fp16 = transpose(perm = var_875, x = var_874_cast_fp16)[name = string("transpose_453")];
+            tensor<fp16, [1, ?, 1024]> x_61_cast_fp16 = reshape(shape = concat_83x, x = var_876_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_880_to_fp16 = const()[name = string("op_880_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205095680)))];
+            tensor<fp16, [1024]> var_881_to_fp16 = const()[name = string("op_881_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207192896)))];
+            tensor<fp16, [1, ?, 1024]> linear_27_cast_fp16 = linear(bias = var_881_to_fp16, weight = var_880_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")];
+            tensor<int32, [1]> var_888_axes_0 = const()[name = string("op_888_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207195008)))];
+            tensor<fp16, [1024]> blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207197120)))];
+            tensor<fp16, [1, ?, 1024]> var_888_cast_fp16 = layer_norm(axes = var_888_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_787_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_888_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207199232)))];
+            tensor<fp16, [1024]> var_898_to_fp16 = const()[name = string("op_898_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209296448)))];
+            tensor<fp16, [1, ?, 1024]> linear_28_cast_fp16 = linear(bias = var_898_to_fp16, weight = var_897_to_fp16, x = var_888_cast_fp16)[name = string("linear_28_cast_fp16")];
+            tensor<int32, [3]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_85 = const()[name = string("concat_85"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_87 = const()[name = string("concat_87"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_88x = const()[name = string("concat_88x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_918_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_918_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_134_to_fp16 = const()[name = string("const_134_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_31_cast_fp16 = mul(x = var_918_cast_fp16, y = const_134_to_fp16)[name = string("q_31_cast_fp16")];
+            tensor<int32, [4]> var_924 = const()[name = string("op_924"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_925_cast_fp16 = reshape(shape = var_924, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_925_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_135_to_fp16 = const()[name = string("const_135_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_39_cast_fp16 = mul(x = var_925_cast_fp16, y = const_135_to_fp16)[name = string("k_39_cast_fp16")];
+            tensor<int32, [4]> var_931 = const()[name = string("op_931"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_932_cast_fp16 = reshape(shape = var_931, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_932_cast_fp16")];
+            tensor<int32, [4]> var_933 = const()[name = string("op_933"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)];
+            bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_207_perm_0 = const()[name = string("transpose_207_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_208_perm_0 = const()[name = string("transpose_208_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_208 = transpose(perm = transpose_208_perm_0, x = k_39_cast_fp16)[name = string("transpose_450")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_207 = transpose(perm = transpose_207_perm_0, x = q_31_cast_fp16)[name = string("transpose_451")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_207, y = transpose_208)[name = string("qk_23_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_937_cast_fp16 = softmax(axis = var_781, x = qk_23_cast_fp16)[name = string("op_937_cast_fp16")];
+            bool var_939_transpose_x_0 = const()[name = string("op_939_transpose_x_0"), val = bool(false)];
+            bool var_939_transpose_y_0 = const()[name = string("op_939_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_39_cast_fp16 = transpose(perm = var_933, x = var_932_cast_fp16)[name = string("transpose_452")];
+            tensor<fp16, [1, 16, ?, 64]> var_939_cast_fp16 = matmul(transpose_x = var_939_transpose_x_0, transpose_y = var_939_transpose_y_0, x = var_937_cast_fp16, y = v_39_cast_fp16)[name = string("op_939_cast_fp16")];
+            tensor<int32, [4]> var_940 = const()[name = string("op_940"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_89x = const()[name = string("concat_89x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_941_cast_fp16 = transpose(perm = var_940, x = var_939_cast_fp16)[name = string("transpose_449")];
+            tensor<fp16, [1, ?, 1024]> x_67_cast_fp16 = reshape(shape = concat_89x, x = var_941_cast_fp16)[name = string("x_67_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_945_to_fp16 = const()[name = string("op_945_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209298560)))];
+            tensor<fp16, [1024]> var_946_to_fp16 = const()[name = string("op_946_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211395776)))];
+            tensor<fp16, [1, ?, 1024]> linear_29_cast_fp16 = linear(bias = var_946_to_fp16, weight = var_945_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")];
+            tensor<int32, [1]> var_953_axes_0 = const()[name = string("op_953_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211397888)))];
+            tensor<fp16, [1024]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211400000)))];
+            tensor<fp16, [1, ?, 1024]> var_953_cast_fp16 = layer_norm(axes = var_953_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_787_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_953_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_962_to_fp16 = const()[name = string("op_962_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211402112)))];
+            tensor<fp16, [4096]> var_963_to_fp16 = const()[name = string("op_963_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219790784)))];
+            tensor<fp16, [1, ?, 4096]> linear_30_cast_fp16 = linear(bias = var_963_to_fp16, weight = var_962_to_fp16, x = var_953_cast_fp16)[name = string("linear_30_cast_fp16")];
+            string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_968_to_fp16 = const()[name = string("op_968_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219799040)))];
+            tensor<fp16, [1024]> var_969_to_fp16 = const()[name = string("op_969_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228187712)))];
+            tensor<fp16, [1, ?, 1024]> linear_31_cast_fp16 = linear(bias = var_969_to_fp16, weight = var_968_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")];
+            tensor<int32, [4]> k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_54)[name = string("k_cache_17_cast_fp16")];
+            tensor<int32, [4]> v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_55)[name = string("v_cache_17_cast_fp16")];
+            tensor<int32, [4]> k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")];
+            tensor<int32, [4]> v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")];
+            int32 var_992 = const()[name = string("op_992"), val = int32(-1)];
+            tensor<int32, [1]> var_1010_axes_0 = const()[name = string("op_1010_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228189824)))];
+            tensor<fp16, [1024]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228191936)))];
+            fp16 var_998_to_fp16 = const()[name = string("op_998_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_1010_cast_fp16 = layer_norm(axes = var_1010_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_998_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_1010_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1021_to_fp16 = const()[name = string("op_1021_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228194048)))];
+            tensor<fp16, [1024]> var_1022_to_fp16 = const()[name = string("op_1022_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230291264)))];
+            tensor<fp16, [1, ?, 1024]> linear_32_cast_fp16 = linear(bias = var_1022_to_fp16, weight = var_1021_to_fp16, x = var_1010_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1025_to_fp16 = const()[name = string("op_1025_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230293376)))];
+            tensor<fp16, [1, ?, 1024]> linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1025_to_fp16, x = var_1010_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1029_to_fp16 = const()[name = string("op_1029_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232390592)))];
+            tensor<fp16, [1024]> var_1030_to_fp16 = const()[name = string("op_1030_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234487808)))];
+            tensor<fp16, [1, ?, 1024]> linear_34_cast_fp16 = linear(bias = var_1030_to_fp16, weight = var_1029_to_fp16, x = var_1010_cast_fp16)[name = string("linear_34_cast_fp16")];
+            tensor<int32, [3]> var_1032_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_1032_shape_cast_fp16")];
+            int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)];
+            int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)];
+            bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)];
+            string var_1032_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1032_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1032_shape_cast_fp16_to_uint16 = cast(dtype = var_1032_shape_cast_fp16_to_uint16_dtype_0, x = var_1032_shape_cast_fp16)[name = string("cast_286")];
+            uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_1032_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")];
+            string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_285")];
+            int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")];
+            tensor<int32, [1]> expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")];
+            tensor<int32, [1]> concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor<int32, [1]>([4])];
+            int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)];
+            bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")];
+            tensor<int32, [1]> concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
+            bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_54)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_56_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_56 = read_state(input = k_cache1)[name = string("coreml_update_state_56")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_55)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_57_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_57 = read_state(input = v_cache1)[name = string("coreml_update_state_57")];
+            int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)];
+            int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(1024)];
+            int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)];
+            bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")];
+            tensor<int32, [3]> var_1048_begin_0 = const()[name = string("op_1048_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1048_end_mask_0 = const()[name = string("op_1048_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_1048_cast_fp16 = slice_by_index(begin = var_1048_begin_0, end = concat_98, end_mask = var_1048_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1048_cast_fp16")];
+            tensor<int32, [3]> var_1051_begin_0 = const()[name = string("op_1051_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1051_end_mask_0 = const()[name = string("op_1051_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_1051_cast_fp16 = slice_by_index(begin = var_1051_begin_0, end = concat_98, end_mask = var_1051_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1051_cast_fp16")];
+            tensor<int32, [4]> concat_100x = const()[name = string("concat_100x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1061_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1061_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_136_to_fp16 = const()[name = string("const_136_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_35_cast_fp16 = mul(x = var_1061_cast_fp16, y = const_136_to_fp16)[name = string("q_35_cast_fp16")];
+            tensor<int32, [4]> concat_101x = const()[name = string("concat_101x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1068_cast_fp16 = reshape(shape = concat_101x, x = var_1048_cast_fp16)[name = string("op_1068_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_137_to_fp16 = const()[name = string("const_137_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_45_cast_fp16 = mul(x = var_1068_cast_fp16, y = const_137_to_fp16)[name = string("k_45_cast_fp16")];
+            tensor<int32, [4]> concat_102x = const()[name = string("concat_102x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1075_cast_fp16 = reshape(shape = concat_102x, x = var_1051_cast_fp16)[name = string("op_1075_cast_fp16")];
+            tensor<int32, [4]> var_1076 = const()[name = string("op_1076"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)];
+            bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_209_perm_0 = const()[name = string("transpose_209_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_210_perm_0 = const()[name = string("transpose_210_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_210 = transpose(perm = transpose_210_perm_0, x = k_45_cast_fp16)[name = string("transpose_446")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_209 = transpose(perm = transpose_209_perm_0, x = q_35_cast_fp16)[name = string("transpose_447")];
+            tensor<fp16, [1, 16, ?, ?]> qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_209, y = transpose_210)[name = string("qk_25_cast_fp16")];
+            int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)];
+            int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)];
+            bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")];
+            tensor<int32, [2]> var_1079_begin_0 = const()[name = string("op_1079_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1079_end_mask_0 = const()[name = string("op_1079_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1079_cast_fp16 = slice_by_index(begin = var_1079_begin_0, end = concat_103, end_mask = var_1079_end_mask_0, x = mask_to_fp16)[name = string("op_1079_cast_fp16")];
+            int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)];
+            int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)];
+            bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")];
+            tensor<int32, [2]> var_1080_begin_0 = const()[name = string("op_1080_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1080_end_mask_0 = const()[name = string("op_1080_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1080_cast_fp16 = slice_by_index(begin = var_1080_begin_0, end = concat_104, end_mask = var_1080_end_mask_0, x = var_1079_cast_fp16)[name = string("op_1080_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1080_cast_fp16)[name = string("qk_27_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_1083_cast_fp16 = softmax(axis = var_992, x = qk_27_cast_fp16)[name = string("op_1083_cast_fp16")];
+            bool var_1085_transpose_x_0 = const()[name = string("op_1085_transpose_x_0"), val = bool(false)];
+            bool var_1085_transpose_y_0 = const()[name = string("op_1085_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_45_cast_fp16 = transpose(perm = var_1076, x = var_1075_cast_fp16)[name = string("transpose_448")];
+            tensor<fp16, [1, 16, ?, 64]> var_1085_cast_fp16 = matmul(transpose_x = var_1085_transpose_x_0, transpose_y = var_1085_transpose_y_0, x = var_1083_cast_fp16, y = v_45_cast_fp16)[name = string("op_1085_cast_fp16")];
+            tensor<int32, [4]> var_1086 = const()[name = string("op_1086"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_105x = const()[name = string("concat_105x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_1087_cast_fp16 = transpose(perm = var_1086, x = var_1085_cast_fp16)[name = string("transpose_445")];
+            tensor<fp16, [1, ?, 1024]> x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1087_cast_fp16)[name = string("x_79_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1091_to_fp16 = const()[name = string("op_1091_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234489920)))];
+            tensor<fp16, [1024]> var_1092_to_fp16 = const()[name = string("op_1092_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236587136)))];
+            tensor<fp16, [1, ?, 1024]> linear_35_cast_fp16 = linear(bias = var_1092_to_fp16, weight = var_1091_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")];
+            tensor<int32, [1]> var_1099_axes_0 = const()[name = string("op_1099_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236589248)))];
+            tensor<fp16, [1024]> blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236591360)))];
+            tensor<fp16, [1, ?, 1024]> var_1099_cast_fp16 = layer_norm(axes = var_1099_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_998_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1099_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236593472)))];
+            tensor<fp16, [1024]> var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238690688)))];
+            tensor<fp16, [1, ?, 1024]> linear_36_cast_fp16 = linear(bias = var_1109_to_fp16, weight = var_1108_to_fp16, x = var_1099_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<int32, [3]> concat_106 = const()[name = string("concat_106"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_108 = const()[name = string("concat_108"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_109 = const()[name = string("concat_109"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_110x = const()[name = string("concat_110x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1129_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1129_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_138_to_fp16 = const()[name = string("const_138_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_39_cast_fp16 = mul(x = var_1129_cast_fp16, y = const_138_to_fp16)[name = string("q_39_cast_fp16")];
+            tensor<int32, [4]> var_1135 = const()[name = string("op_1135"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1136_cast_fp16 = reshape(shape = var_1135, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1136_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_139_to_fp16 = const()[name = string("const_139_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_49_cast_fp16 = mul(x = var_1136_cast_fp16, y = const_139_to_fp16)[name = string("k_49_cast_fp16")];
+            tensor<int32, [4]> var_1142 = const()[name = string("op_1142"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1143_cast_fp16 = reshape(shape = var_1142, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1143_cast_fp16")];
+            tensor<int32, [4]> var_1144 = const()[name = string("op_1144"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)];
+            bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_211_perm_0 = const()[name = string("transpose_211_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_212_perm_0 = const()[name = string("transpose_212_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_212 = transpose(perm = transpose_212_perm_0, x = k_49_cast_fp16)[name = string("transpose_442")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_211 = transpose(perm = transpose_211_perm_0, x = q_39_cast_fp16)[name = string("transpose_443")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_211, y = transpose_212)[name = string("qk_29_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_1148_cast_fp16 = softmax(axis = var_992, x = qk_29_cast_fp16)[name = string("op_1148_cast_fp16")];
+            bool var_1150_transpose_x_0 = const()[name = string("op_1150_transpose_x_0"), val = bool(false)];
+            bool var_1150_transpose_y_0 = const()[name = string("op_1150_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_49_cast_fp16 = transpose(perm = var_1144, x = var_1143_cast_fp16)[name = string("transpose_444")];
+            tensor<fp16, [1, 16, ?, 64]> var_1150_cast_fp16 = matmul(transpose_x = var_1150_transpose_x_0, transpose_y = var_1150_transpose_y_0, x = var_1148_cast_fp16, y = v_49_cast_fp16)[name = string("op_1150_cast_fp16")];
+            tensor<int32, [4]> var_1151 = const()[name = string("op_1151"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_111x = const()[name = string("concat_111x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_1152_cast_fp16 = transpose(perm = var_1151, x = var_1150_cast_fp16)[name = string("transpose_441")];
+            tensor<fp16, [1, ?, 1024]> x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1152_cast_fp16)[name = string("x_85_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1156_to_fp16 = const()[name = string("op_1156_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238692800)))];
+            tensor<fp16, [1024]> var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240790016)))];
+            tensor<fp16, [1, ?, 1024]> linear_37_cast_fp16 = linear(bias = var_1157_to_fp16, weight = var_1156_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")];
+            tensor<int32, [1]> var_1164_axes_0 = const()[name = string("op_1164_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240792128)))];
+            tensor<fp16, [1024]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240794240)))];
+            tensor<fp16, [1, ?, 1024]> var_1164_cast_fp16 = layer_norm(axes = var_1164_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_998_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1164_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1173_to_fp16 = const()[name = string("op_1173_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240796352)))];
+            tensor<fp16, [4096]> var_1174_to_fp16 = const()[name = string("op_1174_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249185024)))];
+            tensor<fp16, [1, ?, 4096]> linear_38_cast_fp16 = linear(bias = var_1174_to_fp16, weight = var_1173_to_fp16, x = var_1164_cast_fp16)[name = string("linear_38_cast_fp16")];
+            string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1179_to_fp16 = const()[name = string("op_1179_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249193280)))];
+            tensor<fp16, [1024]> var_1180_to_fp16 = const()[name = string("op_1180_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257581952)))];
+            tensor<fp16, [1, ?, 1024]> linear_39_cast_fp16 = linear(bias = var_1180_to_fp16, weight = var_1179_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")];
+            tensor<int32, [4]> k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_56)[name = string("k_cache_21_cast_fp16")];
+            tensor<int32, [4]> v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_57)[name = string("v_cache_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")];
+            tensor<int32, [4]> v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")];
+            int32 var_1203 = const()[name = string("op_1203"), val = int32(-1)];
+            tensor<int32, [1]> var_1221_axes_0 = const()[name = string("op_1221_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257584064)))];
+            tensor<fp16, [1024]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257586176)))];
+            fp16 var_1209_to_fp16 = const()[name = string("op_1209_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_1221_cast_fp16 = layer_norm(axes = var_1221_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1209_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1221_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1232_to_fp16 = const()[name = string("op_1232_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257588288)))];
+            tensor<fp16, [1024]> var_1233_to_fp16 = const()[name = string("op_1233_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259685504)))];
+            tensor<fp16, [1, ?, 1024]> linear_40_cast_fp16 = linear(bias = var_1233_to_fp16, weight = var_1232_to_fp16, x = var_1221_cast_fp16)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1236_to_fp16 = const()[name = string("op_1236_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259687616)))];
+            tensor<fp16, [1, ?, 1024]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1236_to_fp16, x = var_1221_cast_fp16)[name = string("linear_41_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1240_to_fp16 = const()[name = string("op_1240_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261784832)))];
+            tensor<fp16, [1024]> var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263882048)))];
+            tensor<fp16, [1, ?, 1024]> linear_42_cast_fp16 = linear(bias = var_1241_to_fp16, weight = var_1240_to_fp16, x = var_1221_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<int32, [3]> var_1243_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1243_shape_cast_fp16")];
+            int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)];
+            int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)];
+            bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)];
+            string var_1243_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1243_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1243_shape_cast_fp16_to_uint16 = cast(dtype = var_1243_shape_cast_fp16_to_uint16_dtype_0, x = var_1243_shape_cast_fp16)[name = string("cast_284")];
+            uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1243_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")];
+            string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_283")];
+            int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")];
+            tensor<int32, [1]> expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")];
+            tensor<int32, [1]> concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor<int32, [1]>([5])];
+            int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
+            bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")];
+            tensor<int32, [1]> concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)];
+            bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_56)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_58_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_58 = read_state(input = k_cache1)[name = string("coreml_update_state_58")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_57)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_59_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_59 = read_state(input = v_cache1)[name = string("coreml_update_state_59")];
+            int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)];
+            int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(1024)];
+            int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)];
+            bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")];
+            tensor<int32, [3]> var_1259_begin_0 = const()[name = string("op_1259_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1259_end_mask_0 = const()[name = string("op_1259_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_1259_cast_fp16 = slice_by_index(begin = var_1259_begin_0, end = concat_120, end_mask = var_1259_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1259_cast_fp16")];
+            tensor<int32, [3]> var_1262_begin_0 = const()[name = string("op_1262_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1262_end_mask_0 = const()[name = string("op_1262_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_1262_cast_fp16 = slice_by_index(begin = var_1262_begin_0, end = concat_120, end_mask = var_1262_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1262_cast_fp16")];
+            tensor<int32, [4]> concat_122x = const()[name = string("concat_122x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1272_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1272_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_140_to_fp16 = const()[name = string("const_140_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_43_cast_fp16 = mul(x = var_1272_cast_fp16, y = const_140_to_fp16)[name = string("q_43_cast_fp16")];
+            tensor<int32, [4]> concat_123x = const()[name = string("concat_123x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1279_cast_fp16 = reshape(shape = concat_123x, x = var_1259_cast_fp16)[name = string("op_1279_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_141_to_fp16 = const()[name = string("const_141_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_55_cast_fp16 = mul(x = var_1279_cast_fp16, y = const_141_to_fp16)[name = string("k_55_cast_fp16")];
+            tensor<int32, [4]> concat_124x = const()[name = string("concat_124x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1286_cast_fp16 = reshape(shape = concat_124x, x = var_1262_cast_fp16)[name = string("op_1286_cast_fp16")];
+            tensor<int32, [4]> var_1287 = const()[name = string("op_1287"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)];
+            bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_213_perm_0 = const()[name = string("transpose_213_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_214_perm_0 = const()[name = string("transpose_214_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_214 = transpose(perm = transpose_214_perm_0, x = k_55_cast_fp16)[name = string("transpose_438")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_213 = transpose(perm = transpose_213_perm_0, x = q_43_cast_fp16)[name = string("transpose_439")];
+            tensor<fp16, [1, 16, ?, ?]> qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_213, y = transpose_214)[name = string("qk_31_cast_fp16")];
+            int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)];
+            int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)];
+            bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")];
+            tensor<int32, [2]> var_1290_begin_0 = const()[name = string("op_1290_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1290_end_mask_0 = const()[name = string("op_1290_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1290_cast_fp16 = slice_by_index(begin = var_1290_begin_0, end = concat_125, end_mask = var_1290_end_mask_0, x = mask_to_fp16)[name = string("op_1290_cast_fp16")];
+            int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)];
+            int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)];
+            bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")];
+            tensor<int32, [2]> var_1291_begin_0 = const()[name = string("op_1291_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1291_end_mask_0 = const()[name = string("op_1291_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1291_cast_fp16 = slice_by_index(begin = var_1291_begin_0, end = concat_126, end_mask = var_1291_end_mask_0, x = var_1290_cast_fp16)[name = string("op_1291_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1291_cast_fp16)[name = string("qk_33_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_1294_cast_fp16 = softmax(axis = var_1203, x = qk_33_cast_fp16)[name = string("op_1294_cast_fp16")];
+            bool var_1296_transpose_x_0 = const()[name = string("op_1296_transpose_x_0"), val = bool(false)];
+            bool var_1296_transpose_y_0 = const()[name = string("op_1296_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_55_cast_fp16 = transpose(perm = var_1287, x = var_1286_cast_fp16)[name = string("transpose_440")];
+            tensor<fp16, [1, 16, ?, 64]> var_1296_cast_fp16 = matmul(transpose_x = var_1296_transpose_x_0, transpose_y = var_1296_transpose_y_0, x = var_1294_cast_fp16, y = v_55_cast_fp16)[name = string("op_1296_cast_fp16")];
+            tensor<int32, [4]> var_1297 = const()[name = string("op_1297"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_127x = const()[name = string("concat_127x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_1298_cast_fp16 = transpose(perm = var_1297, x = var_1296_cast_fp16)[name = string("transpose_437")];
+            tensor<fp16, [1, ?, 1024]> x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1298_cast_fp16)[name = string("x_97_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1302_to_fp16 = const()[name = string("op_1302_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263884160)))];
+            tensor<fp16, [1024]> var_1303_to_fp16 = const()[name = string("op_1303_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265981376)))];
+            tensor<fp16, [1, ?, 1024]> linear_43_cast_fp16 = linear(bias = var_1303_to_fp16, weight = var_1302_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")];
+            tensor<int32, [1]> var_1310_axes_0 = const()[name = string("op_1310_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265983488)))];
+            tensor<fp16, [1024]> blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265985600)))];
+            tensor<fp16, [1, ?, 1024]> var_1310_cast_fp16 = layer_norm(axes = var_1310_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1209_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1310_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1319_to_fp16 = const()[name = string("op_1319_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265987712)))];
+            tensor<fp16, [1024]> var_1320_to_fp16 = const()[name = string("op_1320_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268084928)))];
+            tensor<fp16, [1, ?, 1024]> linear_44_cast_fp16 = linear(bias = var_1320_to_fp16, weight = var_1319_to_fp16, x = var_1310_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [3]> concat_128 = const()[name = string("concat_128"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_129 = const()[name = string("concat_129"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_130 = const()[name = string("concat_130"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_131 = const()[name = string("concat_131"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_132x = const()[name = string("concat_132x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1340_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1340_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_142_to_fp16 = const()[name = string("const_142_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_47_cast_fp16 = mul(x = var_1340_cast_fp16, y = const_142_to_fp16)[name = string("q_47_cast_fp16")];
+            tensor<int32, [4]> var_1346 = const()[name = string("op_1346"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1347_cast_fp16 = reshape(shape = var_1346, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1347_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_143_to_fp16 = const()[name = string("const_143_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_59_cast_fp16 = mul(x = var_1347_cast_fp16, y = const_143_to_fp16)[name = string("k_59_cast_fp16")];
+            tensor<int32, [4]> var_1353 = const()[name = string("op_1353"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1354_cast_fp16 = reshape(shape = var_1353, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1354_cast_fp16")];
+            tensor<int32, [4]> var_1355 = const()[name = string("op_1355"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)];
+            bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_215_perm_0 = const()[name = string("transpose_215_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_216_perm_0 = const()[name = string("transpose_216_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_216 = transpose(perm = transpose_216_perm_0, x = k_59_cast_fp16)[name = string("transpose_434")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_215 = transpose(perm = transpose_215_perm_0, x = q_47_cast_fp16)[name = string("transpose_435")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_215, y = transpose_216)[name = string("qk_35_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_1359_cast_fp16 = softmax(axis = var_1203, x = qk_35_cast_fp16)[name = string("op_1359_cast_fp16")];
+            bool var_1361_transpose_x_0 = const()[name = string("op_1361_transpose_x_0"), val = bool(false)];
+            bool var_1361_transpose_y_0 = const()[name = string("op_1361_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_59_cast_fp16 = transpose(perm = var_1355, x = var_1354_cast_fp16)[name = string("transpose_436")];
+            tensor<fp16, [1, 16, ?, 64]> var_1361_cast_fp16 = matmul(transpose_x = var_1361_transpose_x_0, transpose_y = var_1361_transpose_y_0, x = var_1359_cast_fp16, y = v_59_cast_fp16)[name = string("op_1361_cast_fp16")];
+            tensor<int32, [4]> var_1362 = const()[name = string("op_1362"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_133x = const()[name = string("concat_133x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_1363_cast_fp16 = transpose(perm = var_1362, x = var_1361_cast_fp16)[name = string("transpose_433")];
+            tensor<fp16, [1, ?, 1024]> x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1363_cast_fp16)[name = string("x_103_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1367_to_fp16 = const()[name = string("op_1367_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268087040)))];
+            tensor<fp16, [1024]> var_1368_to_fp16 = const()[name = string("op_1368_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270184256)))];
+            tensor<fp16, [1, ?, 1024]> linear_45_cast_fp16 = linear(bias = var_1368_to_fp16, weight = var_1367_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")];
+            tensor<int32, [1]> var_1375_axes_0 = const()[name = string("op_1375_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270186368)))];
+            tensor<fp16, [1024]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270188480)))];
+            tensor<fp16, [1, ?, 1024]> var_1375_cast_fp16 = layer_norm(axes = var_1375_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1209_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1375_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1384_to_fp16 = const()[name = string("op_1384_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270190592)))];
+            tensor<fp16, [4096]> var_1385_to_fp16 = const()[name = string("op_1385_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278579264)))];
+            tensor<fp16, [1, ?, 4096]> linear_46_cast_fp16 = linear(bias = var_1385_to_fp16, weight = var_1384_to_fp16, x = var_1375_cast_fp16)[name = string("linear_46_cast_fp16")];
+            string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1390_to_fp16 = const()[name = string("op_1390_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278587520)))];
+            tensor<fp16, [1024]> var_1391_to_fp16 = const()[name = string("op_1391_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286976192)))];
+            tensor<fp16, [1, ?, 1024]> linear_47_cast_fp16 = linear(bias = var_1391_to_fp16, weight = var_1390_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")];
+            tensor<int32, [4]> k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor<int32, [4]>([7, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_58)[name = string("k_cache_25_cast_fp16")];
+            tensor<int32, [4]> v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor<int32, [4]>([7, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_59)[name = string("v_cache_25_cast_fp16")];
+            tensor<int32, [4]> k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor<int32, [4]>([7, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")];
+            tensor<int32, [4]> v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor<int32, [4]>([7, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")];
+            int32 var_1414 = const()[name = string("op_1414"), val = int32(-1)];
+            tensor<int32, [1]> var_1432_axes_0 = const()[name = string("op_1432_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286978304)))];
+            tensor<fp16, [1024]> blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286980416)))];
+            fp16 var_1420_to_fp16 = const()[name = string("op_1420_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_1432_cast_fp16 = layer_norm(axes = var_1432_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1420_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1432_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1443_to_fp16 = const()[name = string("op_1443_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286982528)))];
+            tensor<fp16, [1024]> var_1444_to_fp16 = const()[name = string("op_1444_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289079744)))];
+            tensor<fp16, [1, ?, 1024]> linear_48_cast_fp16 = linear(bias = var_1444_to_fp16, weight = var_1443_to_fp16, x = var_1432_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1447_to_fp16 = const()[name = string("op_1447_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289081856)))];
+            tensor<fp16, [1, ?, 1024]> linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1447_to_fp16, x = var_1432_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1451_to_fp16 = const()[name = string("op_1451_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291179072)))];
+            tensor<fp16, [1024]> var_1452_to_fp16 = const()[name = string("op_1452_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293276288)))];
+            tensor<fp16, [1, ?, 1024]> linear_50_cast_fp16 = linear(bias = var_1452_to_fp16, weight = var_1451_to_fp16, x = var_1432_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [3]> var_1454_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1454_shape_cast_fp16")];
+            int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)];
+            int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)];
+            bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)];
+            string var_1454_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1454_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1454_shape_cast_fp16_to_uint16 = cast(dtype = var_1454_shape_cast_fp16_to_uint16_dtype_0, x = var_1454_shape_cast_fp16)[name = string("cast_282")];
+            uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1454_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")];
+            string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_281")];
+            int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")];
+            tensor<int32, [1]> expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")];
+            tensor<int32, [1]> concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor<int32, [1]>([6])];
+            int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)];
+            bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")];
+            tensor<int32, [1]> concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)];
+            bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_58)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_60_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_60 = read_state(input = k_cache1)[name = string("coreml_update_state_60")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_59)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_61_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_61 = read_state(input = v_cache1)[name = string("coreml_update_state_61")];
+            int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)];
+            int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(1024)];
+            int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)];
+            bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")];
+            tensor<int32, [3]> var_1470_begin_0 = const()[name = string("op_1470_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1470_end_mask_0 = const()[name = string("op_1470_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = concat_142, end_mask = var_1470_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1470_cast_fp16")];
+            tensor<int32, [3]> var_1473_begin_0 = const()[name = string("op_1473_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1473_end_mask_0 = const()[name = string("op_1473_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_1473_cast_fp16 = slice_by_index(begin = var_1473_begin_0, end = concat_142, end_mask = var_1473_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1473_cast_fp16")];
+            tensor<int32, [4]> concat_144x = const()[name = string("concat_144x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1483_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1483_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_144_to_fp16 = const()[name = string("const_144_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_51_cast_fp16 = mul(x = var_1483_cast_fp16, y = const_144_to_fp16)[name = string("q_51_cast_fp16")];
+            tensor<int32, [4]> concat_145x = const()[name = string("concat_145x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1490_cast_fp16 = reshape(shape = concat_145x, x = var_1470_cast_fp16)[name = string("op_1490_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_145_to_fp16 = const()[name = string("const_145_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_65_cast_fp16 = mul(x = var_1490_cast_fp16, y = const_145_to_fp16)[name = string("k_65_cast_fp16")];
+            tensor<int32, [4]> concat_146x = const()[name = string("concat_146x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1497_cast_fp16 = reshape(shape = concat_146x, x = var_1473_cast_fp16)[name = string("op_1497_cast_fp16")];
+            tensor<int32, [4]> var_1498 = const()[name = string("op_1498"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)];
+            bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_217_perm_0 = const()[name = string("transpose_217_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_218_perm_0 = const()[name = string("transpose_218_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_218 = transpose(perm = transpose_218_perm_0, x = k_65_cast_fp16)[name = string("transpose_430")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_217 = transpose(perm = transpose_217_perm_0, x = q_51_cast_fp16)[name = string("transpose_431")];
+            tensor<fp16, [1, 16, ?, ?]> qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_217, y = transpose_218)[name = string("qk_37_cast_fp16")];
+            int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)];
+            int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)];
+            bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")];
+            tensor<int32, [2]> var_1501_begin_0 = const()[name = string("op_1501_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1501_end_mask_0 = const()[name = string("op_1501_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1501_cast_fp16 = slice_by_index(begin = var_1501_begin_0, end = concat_147, end_mask = var_1501_end_mask_0, x = mask_to_fp16)[name = string("op_1501_cast_fp16")];
+            int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)];
+            int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)];
+            bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")];
+            tensor<int32, [2]> var_1502_begin_0 = const()[name = string("op_1502_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1502_end_mask_0 = const()[name = string("op_1502_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = concat_148, end_mask = var_1502_end_mask_0, x = var_1501_cast_fp16)[name = string("op_1502_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1502_cast_fp16)[name = string("qk_39_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_1505_cast_fp16 = softmax(axis = var_1414, x = qk_39_cast_fp16)[name = string("op_1505_cast_fp16")];
+            bool var_1507_transpose_x_0 = const()[name = string("op_1507_transpose_x_0"), val = bool(false)];
+            bool var_1507_transpose_y_0 = const()[name = string("op_1507_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_65_cast_fp16 = transpose(perm = var_1498, x = var_1497_cast_fp16)[name = string("transpose_432")];
+            tensor<fp16, [1, 16, ?, 64]> var_1507_cast_fp16 = matmul(transpose_x = var_1507_transpose_x_0, transpose_y = var_1507_transpose_y_0, x = var_1505_cast_fp16, y = v_65_cast_fp16)[name = string("op_1507_cast_fp16")];
+            tensor<int32, [4]> var_1508 = const()[name = string("op_1508"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_149x = const()[name = string("concat_149x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_1509_cast_fp16 = transpose(perm = var_1508, x = var_1507_cast_fp16)[name = string("transpose_429")];
+            tensor<fp16, [1, ?, 1024]> x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1509_cast_fp16)[name = string("x_115_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1513_to_fp16 = const()[name = string("op_1513_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293278400)))];
+            tensor<fp16, [1024]> var_1514_to_fp16 = const()[name = string("op_1514_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295375616)))];
+            tensor<fp16, [1, ?, 1024]> linear_51_cast_fp16 = linear(bias = var_1514_to_fp16, weight = var_1513_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")];
+            tensor<int32, [1]> var_1521_axes_0 = const()[name = string("op_1521_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295377728)))];
+            tensor<fp16, [1024]> blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295379840)))];
+            tensor<fp16, [1, ?, 1024]> var_1521_cast_fp16 = layer_norm(axes = var_1521_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1420_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1521_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295381952)))];
+            tensor<fp16, [1024]> var_1531_to_fp16 = const()[name = string("op_1531_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297479168)))];
+            tensor<fp16, [1, ?, 1024]> linear_52_cast_fp16 = linear(bias = var_1531_to_fp16, weight = var_1530_to_fp16, x = var_1521_cast_fp16)[name = string("linear_52_cast_fp16")];
+            tensor<int32, [3]> concat_150 = const()[name = string("concat_150"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_151 = const()[name = string("concat_151"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_152 = const()[name = string("concat_152"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_153 = const()[name = string("concat_153"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_154x = const()[name = string("concat_154x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1551_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1551_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_146_to_fp16 = const()[name = string("const_146_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_55_cast_fp16 = mul(x = var_1551_cast_fp16, y = const_146_to_fp16)[name = string("q_55_cast_fp16")];
+            tensor<int32, [4]> var_1557 = const()[name = string("op_1557"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1558_cast_fp16 = reshape(shape = var_1557, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1558_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_147_to_fp16 = const()[name = string("const_147_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_69_cast_fp16 = mul(x = var_1558_cast_fp16, y = const_147_to_fp16)[name = string("k_69_cast_fp16")];
+            tensor<int32, [4]> var_1564 = const()[name = string("op_1564"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1565_cast_fp16 = reshape(shape = var_1564, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1565_cast_fp16")];
+            tensor<int32, [4]> var_1566 = const()[name = string("op_1566"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)];
+            bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_219_perm_0 = const()[name = string("transpose_219_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_220_perm_0 = const()[name = string("transpose_220_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_220 = transpose(perm = transpose_220_perm_0, x = k_69_cast_fp16)[name = string("transpose_426")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_219 = transpose(perm = transpose_219_perm_0, x = q_55_cast_fp16)[name = string("transpose_427")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_219, y = transpose_220)[name = string("qk_41_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_1570_cast_fp16 = softmax(axis = var_1414, x = qk_41_cast_fp16)[name = string("op_1570_cast_fp16")];
+            bool var_1572_transpose_x_0 = const()[name = string("op_1572_transpose_x_0"), val = bool(false)];
+            bool var_1572_transpose_y_0 = const()[name = string("op_1572_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_69_cast_fp16 = transpose(perm = var_1566, x = var_1565_cast_fp16)[name = string("transpose_428")];
+            tensor<fp16, [1, 16, ?, 64]> var_1572_cast_fp16 = matmul(transpose_x = var_1572_transpose_x_0, transpose_y = var_1572_transpose_y_0, x = var_1570_cast_fp16, y = v_69_cast_fp16)[name = string("op_1572_cast_fp16")];
+            tensor<int32, [4]> var_1573 = const()[name = string("op_1573"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_155x = const()[name = string("concat_155x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_1574_cast_fp16 = transpose(perm = var_1573, x = var_1572_cast_fp16)[name = string("transpose_425")];
+            tensor<fp16, [1, ?, 1024]> x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1574_cast_fp16)[name = string("x_121_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1578_to_fp16 = const()[name = string("op_1578_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297481280)))];
+            tensor<fp16, [1024]> var_1579_to_fp16 = const()[name = string("op_1579_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299578496)))];
+            tensor<fp16, [1, ?, 1024]> linear_53_cast_fp16 = linear(bias = var_1579_to_fp16, weight = var_1578_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")];
+            tensor<int32, [1]> var_1586_axes_0 = const()[name = string("op_1586_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299580608)))];
+            tensor<fp16, [1024]> blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299582720)))];
+            tensor<fp16, [1, ?, 1024]> var_1586_cast_fp16 = layer_norm(axes = var_1586_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1420_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1586_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1595_to_fp16 = const()[name = string("op_1595_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299584832)))];
+            tensor<fp16, [4096]> var_1596_to_fp16 = const()[name = string("op_1596_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307973504)))];
+            tensor<fp16, [1, ?, 4096]> linear_54_cast_fp16 = linear(bias = var_1596_to_fp16, weight = var_1595_to_fp16, x = var_1586_cast_fp16)[name = string("linear_54_cast_fp16")];
+            string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1601_to_fp16 = const()[name = string("op_1601_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307981760)))];
+            tensor<fp16, [1024]> var_1602_to_fp16 = const()[name = string("op_1602_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316370432)))];
+            tensor<fp16, [1, ?, 1024]> linear_55_cast_fp16 = linear(bias = var_1602_to_fp16, weight = var_1601_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")];
+            tensor<int32, [4]> k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor<int32, [4]>([8, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_60)[name = string("k_cache_29_cast_fp16")];
+            tensor<int32, [4]> v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor<int32, [4]>([8, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_61)[name = string("v_cache_29_cast_fp16")];
+            tensor<int32, [4]> k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor<int32, [4]>([8, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")];
+            tensor<int32, [4]> v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor<int32, [4]>([8, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")];
+            int32 var_1625 = const()[name = string("op_1625"), val = int32(-1)];
+            tensor<int32, [1]> var_1643_axes_0 = const()[name = string("op_1643_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316372544)))];
+            tensor<fp16, [1024]> blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316374656)))];
+            fp16 var_1631_to_fp16 = const()[name = string("op_1631_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_1643_cast_fp16 = layer_norm(axes = var_1643_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1631_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1643_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1654_to_fp16 = const()[name = string("op_1654_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316376768)))];
+            tensor<fp16, [1024]> var_1655_to_fp16 = const()[name = string("op_1655_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318473984)))];
+            tensor<fp16, [1, ?, 1024]> linear_56_cast_fp16 = linear(bias = var_1655_to_fp16, weight = var_1654_to_fp16, x = var_1643_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1658_to_fp16 = const()[name = string("op_1658_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318476096)))];
+            tensor<fp16, [1, ?, 1024]> linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1658_to_fp16, x = var_1643_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1662_to_fp16 = const()[name = string("op_1662_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320573312)))];
+            tensor<fp16, [1024]> var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322670528)))];
+            tensor<fp16, [1, ?, 1024]> linear_58_cast_fp16 = linear(bias = var_1663_to_fp16, weight = var_1662_to_fp16, x = var_1643_cast_fp16)[name = string("linear_58_cast_fp16")];
+            tensor<int32, [3]> var_1665_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1665_shape_cast_fp16")];
+            int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)];
+            int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)];
+            bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)];
+            string var_1665_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1665_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1665_shape_cast_fp16_to_uint16 = cast(dtype = var_1665_shape_cast_fp16_to_uint16_dtype_0, x = var_1665_shape_cast_fp16)[name = string("cast_280")];
+            uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1665_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")];
+            string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_279")];
+            int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")];
+            tensor<int32, [1]> expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")];
+            tensor<int32, [1]> concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor<int32, [1]>([7])];
+            int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)];
+            bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")];
+            tensor<int32, [1]> concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)];
+            bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_60)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_62_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_62 = read_state(input = k_cache1)[name = string("coreml_update_state_62")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_61)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_63_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_63 = read_state(input = v_cache1)[name = string("coreml_update_state_63")];
+            int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)];
+            int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(1024)];
+            int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)];
+            bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")];
+            tensor<int32, [3]> var_1681_begin_0 = const()[name = string("op_1681_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1681_end_mask_0 = const()[name = string("op_1681_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_1681_cast_fp16 = slice_by_index(begin = var_1681_begin_0, end = concat_164, end_mask = var_1681_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1681_cast_fp16")];
+            tensor<int32, [3]> var_1684_begin_0 = const()[name = string("op_1684_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1684_end_mask_0 = const()[name = string("op_1684_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_1684_cast_fp16 = slice_by_index(begin = var_1684_begin_0, end = concat_164, end_mask = var_1684_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1684_cast_fp16")];
+            tensor<int32, [4]> concat_166x = const()[name = string("concat_166x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1694_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1694_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_148_to_fp16 = const()[name = string("const_148_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_59_cast_fp16 = mul(x = var_1694_cast_fp16, y = const_148_to_fp16)[name = string("q_59_cast_fp16")];
+            tensor<int32, [4]> concat_167x = const()[name = string("concat_167x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1701_cast_fp16 = reshape(shape = concat_167x, x = var_1681_cast_fp16)[name = string("op_1701_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_149_to_fp16 = const()[name = string("const_149_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_75_cast_fp16 = mul(x = var_1701_cast_fp16, y = const_149_to_fp16)[name = string("k_75_cast_fp16")];
+            tensor<int32, [4]> concat_168x = const()[name = string("concat_168x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1708_cast_fp16 = reshape(shape = concat_168x, x = var_1684_cast_fp16)[name = string("op_1708_cast_fp16")];
+            tensor<int32, [4]> var_1709 = const()[name = string("op_1709"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)];
+            bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_221_perm_0 = const()[name = string("transpose_221_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_222_perm_0 = const()[name = string("transpose_222_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_222 = transpose(perm = transpose_222_perm_0, x = k_75_cast_fp16)[name = string("transpose_422")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_221 = transpose(perm = transpose_221_perm_0, x = q_59_cast_fp16)[name = string("transpose_423")];
+            tensor<fp16, [1, 16, ?, ?]> qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_221, y = transpose_222)[name = string("qk_43_cast_fp16")];
+            int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)];
+            int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)];
+            bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")];
+            tensor<int32, [2]> var_1712_begin_0 = const()[name = string("op_1712_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1712_end_mask_0 = const()[name = string("op_1712_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1712_cast_fp16 = slice_by_index(begin = var_1712_begin_0, end = concat_169, end_mask = var_1712_end_mask_0, x = mask_to_fp16)[name = string("op_1712_cast_fp16")];
+            int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)];
+            int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)];
+            bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")];
+            tensor<int32, [2]> var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = concat_170, end_mask = var_1713_end_mask_0, x = var_1712_cast_fp16)[name = string("op_1713_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1713_cast_fp16)[name = string("qk_45_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_1716_cast_fp16 = softmax(axis = var_1625, x = qk_45_cast_fp16)[name = string("op_1716_cast_fp16")];
+            bool var_1718_transpose_x_0 = const()[name = string("op_1718_transpose_x_0"), val = bool(false)];
+            bool var_1718_transpose_y_0 = const()[name = string("op_1718_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_75_cast_fp16 = transpose(perm = var_1709, x = var_1708_cast_fp16)[name = string("transpose_424")];
+            tensor<fp16, [1, 16, ?, 64]> var_1718_cast_fp16 = matmul(transpose_x = var_1718_transpose_x_0, transpose_y = var_1718_transpose_y_0, x = var_1716_cast_fp16, y = v_75_cast_fp16)[name = string("op_1718_cast_fp16")];
+            tensor<int32, [4]> var_1719 = const()[name = string("op_1719"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_171x = const()[name = string("concat_171x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_1720_cast_fp16 = transpose(perm = var_1719, x = var_1718_cast_fp16)[name = string("transpose_421")];
+            tensor<fp16, [1, ?, 1024]> x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1720_cast_fp16)[name = string("x_133_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1724_to_fp16 = const()[name = string("op_1724_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322672640)))];
+            tensor<fp16, [1024]> var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324769856)))];
+            tensor<fp16, [1, ?, 1024]> linear_59_cast_fp16 = linear(bias = var_1725_to_fp16, weight = var_1724_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")];
+            tensor<int32, [1]> var_1732_axes_0 = const()[name = string("op_1732_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324771968)))];
+            tensor<fp16, [1024]> blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324774080)))];
+            tensor<fp16, [1, ?, 1024]> var_1732_cast_fp16 = layer_norm(axes = var_1732_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1631_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1732_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1741_to_fp16 = const()[name = string("op_1741_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324776192)))];
+            tensor<fp16, [1024]> var_1742_to_fp16 = const()[name = string("op_1742_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326873408)))];
+            tensor<fp16, [1, ?, 1024]> linear_60_cast_fp16 = linear(bias = var_1742_to_fp16, weight = var_1741_to_fp16, x = var_1732_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<int32, [3]> concat_172 = const()[name = string("concat_172"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_173 = const()[name = string("concat_173"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_174 = const()[name = string("concat_174"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_175 = const()[name = string("concat_175"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_176x = const()[name = string("concat_176x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1762_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1762_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_150_to_fp16 = const()[name = string("const_150_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_63_cast_fp16 = mul(x = var_1762_cast_fp16, y = const_150_to_fp16)[name = string("q_63_cast_fp16")];
+            tensor<int32, [4]> var_1768 = const()[name = string("op_1768"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1769_cast_fp16 = reshape(shape = var_1768, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1769_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_151_to_fp16 = const()[name = string("const_151_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_79_cast_fp16 = mul(x = var_1769_cast_fp16, y = const_151_to_fp16)[name = string("k_79_cast_fp16")];
+            tensor<int32, [4]> var_1775 = const()[name = string("op_1775"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1776_cast_fp16 = reshape(shape = var_1775, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1776_cast_fp16")];
+            tensor<int32, [4]> var_1777 = const()[name = string("op_1777"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)];
+            bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_223_perm_0 = const()[name = string("transpose_223_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_224_perm_0 = const()[name = string("transpose_224_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_224 = transpose(perm = transpose_224_perm_0, x = k_79_cast_fp16)[name = string("transpose_418")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_223 = transpose(perm = transpose_223_perm_0, x = q_63_cast_fp16)[name = string("transpose_419")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_223, y = transpose_224)[name = string("qk_47_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_1781_cast_fp16 = softmax(axis = var_1625, x = qk_47_cast_fp16)[name = string("op_1781_cast_fp16")];
+            bool var_1783_transpose_x_0 = const()[name = string("op_1783_transpose_x_0"), val = bool(false)];
+            bool var_1783_transpose_y_0 = const()[name = string("op_1783_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_79_cast_fp16 = transpose(perm = var_1777, x = var_1776_cast_fp16)[name = string("transpose_420")];
+            tensor<fp16, [1, 16, ?, 64]> var_1783_cast_fp16 = matmul(transpose_x = var_1783_transpose_x_0, transpose_y = var_1783_transpose_y_0, x = var_1781_cast_fp16, y = v_79_cast_fp16)[name = string("op_1783_cast_fp16")];
+            tensor<int32, [4]> var_1784 = const()[name = string("op_1784"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_177x = const()[name = string("concat_177x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_1785_cast_fp16 = transpose(perm = var_1784, x = var_1783_cast_fp16)[name = string("transpose_417")];
+            tensor<fp16, [1, ?, 1024]> x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1785_cast_fp16)[name = string("x_139_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1789_to_fp16 = const()[name = string("op_1789_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326875520)))];
+            tensor<fp16, [1024]> var_1790_to_fp16 = const()[name = string("op_1790_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328972736)))];
+            tensor<fp16, [1, ?, 1024]> linear_61_cast_fp16 = linear(bias = var_1790_to_fp16, weight = var_1789_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")];
+            tensor<int32, [1]> var_1797_axes_0 = const()[name = string("op_1797_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328974848)))];
+            tensor<fp16, [1024]> blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328976960)))];
+            tensor<fp16, [1, ?, 1024]> var_1797_cast_fp16 = layer_norm(axes = var_1797_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1631_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1797_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1806_to_fp16 = const()[name = string("op_1806_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328979072)))];
+            tensor<fp16, [4096]> var_1807_to_fp16 = const()[name = string("op_1807_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337367744)))];
+            tensor<fp16, [1, ?, 4096]> linear_62_cast_fp16 = linear(bias = var_1807_to_fp16, weight = var_1806_to_fp16, x = var_1797_cast_fp16)[name = string("linear_62_cast_fp16")];
+            string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1812_to_fp16 = const()[name = string("op_1812_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337376000)))];
+            tensor<fp16, [1024]> var_1813_to_fp16 = const()[name = string("op_1813_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345764672)))];
+            tensor<fp16, [1, ?, 1024]> linear_63_cast_fp16 = linear(bias = var_1813_to_fp16, weight = var_1812_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")];
+            tensor<int32, [4]> k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor<int32, [4]>([9, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_62)[name = string("k_cache_33_cast_fp16")];
+            tensor<int32, [4]> v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor<int32, [4]>([9, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_63)[name = string("v_cache_33_cast_fp16")];
+            tensor<int32, [4]> k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor<int32, [4]>([9, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")];
+            tensor<int32, [4]> v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor<int32, [4]>([9, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")];
+            int32 var_1836 = const()[name = string("op_1836"), val = int32(-1)];
+            tensor<int32, [1]> var_1854_axes_0 = const()[name = string("op_1854_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345766784)))];
+            tensor<fp16, [1024]> blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345768896)))];
+            fp16 var_1842_to_fp16 = const()[name = string("op_1842_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_1854_cast_fp16 = layer_norm(axes = var_1854_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1842_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1854_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1865_to_fp16 = const()[name = string("op_1865_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345771008)))];
+            tensor<fp16, [1024]> var_1866_to_fp16 = const()[name = string("op_1866_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347868224)))];
+            tensor<fp16, [1, ?, 1024]> linear_64_cast_fp16 = linear(bias = var_1866_to_fp16, weight = var_1865_to_fp16, x = var_1854_cast_fp16)[name = string("linear_64_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1869_to_fp16 = const()[name = string("op_1869_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347870336)))];
+            tensor<fp16, [1, ?, 1024]> linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1869_to_fp16, x = var_1854_cast_fp16)[name = string("linear_65_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1873_to_fp16 = const()[name = string("op_1873_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349967552)))];
+            tensor<fp16, [1024]> var_1874_to_fp16 = const()[name = string("op_1874_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352064768)))];
+            tensor<fp16, [1, ?, 1024]> linear_66_cast_fp16 = linear(bias = var_1874_to_fp16, weight = var_1873_to_fp16, x = var_1854_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<int32, [3]> var_1876_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1876_shape_cast_fp16")];
+            int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)];
+            int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)];
+            bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)];
+            string var_1876_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1876_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1876_shape_cast_fp16_to_uint16 = cast(dtype = var_1876_shape_cast_fp16_to_uint16_dtype_0, x = var_1876_shape_cast_fp16)[name = string("cast_278")];
+            uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1876_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")];
+            string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_277")];
+            int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")];
+            tensor<int32, [1]> expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")];
+            tensor<int32, [1]> concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor<int32, [1]>([8])];
+            int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)];
+            bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")];
+            tensor<int32, [1]> concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)];
+            bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_62)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_64_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_64 = read_state(input = k_cache1)[name = string("coreml_update_state_64")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_63)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_65_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_65 = read_state(input = v_cache1)[name = string("coreml_update_state_65")];
+            int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)];
+            int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(1024)];
+            int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)];
+            bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")];
+            tensor<int32, [3]> var_1892_begin_0 = const()[name = string("op_1892_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1892_end_mask_0 = const()[name = string("op_1892_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_1892_cast_fp16 = slice_by_index(begin = var_1892_begin_0, end = concat_186, end_mask = var_1892_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1892_cast_fp16")];
+            tensor<int32, [3]> var_1895_begin_0 = const()[name = string("op_1895_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1895_end_mask_0 = const()[name = string("op_1895_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_1895_cast_fp16 = slice_by_index(begin = var_1895_begin_0, end = concat_186, end_mask = var_1895_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1895_cast_fp16")];
+            tensor<int32, [4]> concat_188x = const()[name = string("concat_188x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1905_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1905_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_152_to_fp16 = const()[name = string("const_152_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_67_cast_fp16 = mul(x = var_1905_cast_fp16, y = const_152_to_fp16)[name = string("q_67_cast_fp16")];
+            tensor<int32, [4]> concat_189x = const()[name = string("concat_189x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1912_cast_fp16 = reshape(shape = concat_189x, x = var_1892_cast_fp16)[name = string("op_1912_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_153_to_fp16 = const()[name = string("const_153_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_85_cast_fp16 = mul(x = var_1912_cast_fp16, y = const_153_to_fp16)[name = string("k_85_cast_fp16")];
+            tensor<int32, [4]> concat_190x = const()[name = string("concat_190x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1919_cast_fp16 = reshape(shape = concat_190x, x = var_1895_cast_fp16)[name = string("op_1919_cast_fp16")];
+            tensor<int32, [4]> var_1920 = const()[name = string("op_1920"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)];
+            bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_225_perm_0 = const()[name = string("transpose_225_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_226_perm_0 = const()[name = string("transpose_226_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_226 = transpose(perm = transpose_226_perm_0, x = k_85_cast_fp16)[name = string("transpose_414")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_225 = transpose(perm = transpose_225_perm_0, x = q_67_cast_fp16)[name = string("transpose_415")];
+            tensor<fp16, [1, 16, ?, ?]> qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_225, y = transpose_226)[name = string("qk_49_cast_fp16")];
+            int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)];
+            int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)];
+            bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")];
+            tensor<int32, [2]> var_1923_begin_0 = const()[name = string("op_1923_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1923_end_mask_0 = const()[name = string("op_1923_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1923_cast_fp16 = slice_by_index(begin = var_1923_begin_0, end = concat_191, end_mask = var_1923_end_mask_0, x = mask_to_fp16)[name = string("op_1923_cast_fp16")];
+            int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)];
+            int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)];
+            bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")];
+            tensor<int32, [2]> var_1924_begin_0 = const()[name = string("op_1924_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1924_end_mask_0 = const()[name = string("op_1924_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = concat_192, end_mask = var_1924_end_mask_0, x = var_1923_cast_fp16)[name = string("op_1924_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1924_cast_fp16)[name = string("qk_51_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_1927_cast_fp16 = softmax(axis = var_1836, x = qk_51_cast_fp16)[name = string("op_1927_cast_fp16")];
+            bool var_1929_transpose_x_0 = const()[name = string("op_1929_transpose_x_0"), val = bool(false)];
+            bool var_1929_transpose_y_0 = const()[name = string("op_1929_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_85_cast_fp16 = transpose(perm = var_1920, x = var_1919_cast_fp16)[name = string("transpose_416")];
+            tensor<fp16, [1, 16, ?, 64]> var_1929_cast_fp16 = matmul(transpose_x = var_1929_transpose_x_0, transpose_y = var_1929_transpose_y_0, x = var_1927_cast_fp16, y = v_85_cast_fp16)[name = string("op_1929_cast_fp16")];
+            tensor<int32, [4]> var_1930 = const()[name = string("op_1930"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_193x = const()[name = string("concat_193x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_1931_cast_fp16 = transpose(perm = var_1930, x = var_1929_cast_fp16)[name = string("transpose_413")];
+            tensor<fp16, [1, ?, 1024]> x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1931_cast_fp16)[name = string("x_151_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1935_to_fp16 = const()[name = string("op_1935_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352066880)))];
+            tensor<fp16, [1024]> var_1936_to_fp16 = const()[name = string("op_1936_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354164096)))];
+            tensor<fp16, [1, ?, 1024]> linear_67_cast_fp16 = linear(bias = var_1936_to_fp16, weight = var_1935_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")];
+            tensor<int32, [1]> var_1943_axes_0 = const()[name = string("op_1943_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354166208)))];
+            tensor<fp16, [1024]> blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354168320)))];
+            tensor<fp16, [1, ?, 1024]> var_1943_cast_fp16 = layer_norm(axes = var_1943_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1842_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1943_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1952_to_fp16 = const()[name = string("op_1952_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354170432)))];
+            tensor<fp16, [1024]> var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356267648)))];
+            tensor<fp16, [1, ?, 1024]> linear_68_cast_fp16 = linear(bias = var_1953_to_fp16, weight = var_1952_to_fp16, x = var_1943_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [3]> concat_194 = const()[name = string("concat_194"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_195 = const()[name = string("concat_195"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_196 = const()[name = string("concat_196"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_197 = const()[name = string("concat_197"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_198x = const()[name = string("concat_198x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_1973_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_1973_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_154_to_fp16 = const()[name = string("const_154_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_71_cast_fp16 = mul(x = var_1973_cast_fp16, y = const_154_to_fp16)[name = string("q_71_cast_fp16")];
+            tensor<int32, [4]> var_1979 = const()[name = string("op_1979"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1980_cast_fp16 = reshape(shape = var_1979, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1980_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_155_to_fp16 = const()[name = string("const_155_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_89_cast_fp16 = mul(x = var_1980_cast_fp16, y = const_155_to_fp16)[name = string("k_89_cast_fp16")];
+            tensor<int32, [4]> var_1986 = const()[name = string("op_1986"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1987_cast_fp16 = reshape(shape = var_1986, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1987_cast_fp16")];
+            tensor<int32, [4]> var_1988 = const()[name = string("op_1988"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)];
+            bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_227_perm_0 = const()[name = string("transpose_227_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_228_perm_0 = const()[name = string("transpose_228_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_228 = transpose(perm = transpose_228_perm_0, x = k_89_cast_fp16)[name = string("transpose_410")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_227 = transpose(perm = transpose_227_perm_0, x = q_71_cast_fp16)[name = string("transpose_411")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_227, y = transpose_228)[name = string("qk_53_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_1992_cast_fp16 = softmax(axis = var_1836, x = qk_53_cast_fp16)[name = string("op_1992_cast_fp16")];
+            bool var_1994_transpose_x_0 = const()[name = string("op_1994_transpose_x_0"), val = bool(false)];
+            bool var_1994_transpose_y_0 = const()[name = string("op_1994_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_89_cast_fp16 = transpose(perm = var_1988, x = var_1987_cast_fp16)[name = string("transpose_412")];
+            tensor<fp16, [1, 16, ?, 64]> var_1994_cast_fp16 = matmul(transpose_x = var_1994_transpose_x_0, transpose_y = var_1994_transpose_y_0, x = var_1992_cast_fp16, y = v_89_cast_fp16)[name = string("op_1994_cast_fp16")];
+            tensor<int32, [4]> var_1995 = const()[name = string("op_1995"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_199x = const()[name = string("concat_199x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_1996_cast_fp16 = transpose(perm = var_1995, x = var_1994_cast_fp16)[name = string("transpose_409")];
+            tensor<fp16, [1, ?, 1024]> x_157_cast_fp16 = reshape(shape = concat_199x, x = var_1996_cast_fp16)[name = string("x_157_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2000_to_fp16 = const()[name = string("op_2000_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356269760)))];
+            tensor<fp16, [1024]> var_2001_to_fp16 = const()[name = string("op_2001_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358366976)))];
+            tensor<fp16, [1, ?, 1024]> linear_69_cast_fp16 = linear(bias = var_2001_to_fp16, weight = var_2000_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")];
+            tensor<int32, [1]> var_2008_axes_0 = const()[name = string("op_2008_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358369088)))];
+            tensor<fp16, [1024]> blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358371200)))];
+            tensor<fp16, [1, ?, 1024]> var_2008_cast_fp16 = layer_norm(axes = var_2008_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1842_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_2008_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2017_to_fp16 = const()[name = string("op_2017_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358373312)))];
+            tensor<fp16, [4096]> var_2018_to_fp16 = const()[name = string("op_2018_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366761984)))];
+            tensor<fp16, [1, ?, 4096]> linear_70_cast_fp16 = linear(bias = var_2018_to_fp16, weight = var_2017_to_fp16, x = var_2008_cast_fp16)[name = string("linear_70_cast_fp16")];
+            string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2023_to_fp16 = const()[name = string("op_2023_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366770240)))];
+            tensor<fp16, [1024]> var_2024_to_fp16 = const()[name = string("op_2024_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375158912)))];
+            tensor<fp16, [1, ?, 1024]> linear_71_cast_fp16 = linear(bias = var_2024_to_fp16, weight = var_2023_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")];
+            tensor<int32, [4]> k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor<int32, [4]>([10, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_64)[name = string("k_cache_37_cast_fp16")];
+            tensor<int32, [4]> v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor<int32, [4]>([10, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_65)[name = string("v_cache_37_cast_fp16")];
+            tensor<int32, [4]> k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor<int32, [4]>([10, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")];
+            tensor<int32, [4]> v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor<int32, [4]>([10, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")];
+            int32 var_2047 = const()[name = string("op_2047"), val = int32(-1)];
+            tensor<int32, [1]> var_2065_axes_0 = const()[name = string("op_2065_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375161024)))];
+            tensor<fp16, [1024]> blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375163136)))];
+            fp16 var_2053_to_fp16 = const()[name = string("op_2053_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_2065_cast_fp16 = layer_norm(axes = var_2065_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2053_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2065_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2076_to_fp16 = const()[name = string("op_2076_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375165248)))];
+            tensor<fp16, [1024]> var_2077_to_fp16 = const()[name = string("op_2077_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377262464)))];
+            tensor<fp16, [1, ?, 1024]> linear_72_cast_fp16 = linear(bias = var_2077_to_fp16, weight = var_2076_to_fp16, x = var_2065_cast_fp16)[name = string("linear_72_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2080_to_fp16 = const()[name = string("op_2080_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377264576)))];
+            tensor<fp16, [1, ?, 1024]> linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2080_to_fp16, x = var_2065_cast_fp16)[name = string("linear_73_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2084_to_fp16 = const()[name = string("op_2084_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379361792)))];
+            tensor<fp16, [1024]> var_2085_to_fp16 = const()[name = string("op_2085_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381459008)))];
+            tensor<fp16, [1, ?, 1024]> linear_74_cast_fp16 = linear(bias = var_2085_to_fp16, weight = var_2084_to_fp16, x = var_2065_cast_fp16)[name = string("linear_74_cast_fp16")];
+            tensor<int32, [3]> var_2087_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2087_shape_cast_fp16")];
+            int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)];
+            int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)];
+            bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)];
+            string var_2087_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2087_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2087_shape_cast_fp16_to_uint16 = cast(dtype = var_2087_shape_cast_fp16_to_uint16_dtype_0, x = var_2087_shape_cast_fp16)[name = string("cast_276")];
+            uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2087_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")];
+            string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_275")];
+            int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")];
+            tensor<int32, [1]> expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")];
+            tensor<int32, [1]> concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor<int32, [1]>([9])];
+            int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)];
+            bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")];
+            tensor<int32, [1]> concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)];
+            bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_64)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_66 = read_state(input = k_cache1)[name = string("coreml_update_state_66")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_65)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_67 = read_state(input = v_cache1)[name = string("coreml_update_state_67")];
+            int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)];
+            int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(1024)];
+            int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)];
+            bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")];
+            tensor<int32, [3]> var_2103_begin_0 = const()[name = string("op_2103_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2103_end_mask_0 = const()[name = string("op_2103_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_2103_cast_fp16 = slice_by_index(begin = var_2103_begin_0, end = concat_208, end_mask = var_2103_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2103_cast_fp16")];
+            tensor<int32, [3]> var_2106_begin_0 = const()[name = string("op_2106_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2106_end_mask_0 = const()[name = string("op_2106_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = concat_208, end_mask = var_2106_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2106_cast_fp16")];
+            tensor<int32, [4]> concat_210x = const()[name = string("concat_210x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2116_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2116_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_156_to_fp16 = const()[name = string("const_156_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_75_cast_fp16 = mul(x = var_2116_cast_fp16, y = const_156_to_fp16)[name = string("q_75_cast_fp16")];
+            tensor<int32, [4]> concat_211x = const()[name = string("concat_211x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2123_cast_fp16 = reshape(shape = concat_211x, x = var_2103_cast_fp16)[name = string("op_2123_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_157_to_fp16 = const()[name = string("const_157_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_95_cast_fp16 = mul(x = var_2123_cast_fp16, y = const_157_to_fp16)[name = string("k_95_cast_fp16")];
+            tensor<int32, [4]> concat_212x = const()[name = string("concat_212x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2130_cast_fp16 = reshape(shape = concat_212x, x = var_2106_cast_fp16)[name = string("op_2130_cast_fp16")];
+            tensor<int32, [4]> var_2131 = const()[name = string("op_2131"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)];
+            bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_229_perm_0 = const()[name = string("transpose_229_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_230_perm_0 = const()[name = string("transpose_230_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_230 = transpose(perm = transpose_230_perm_0, x = k_95_cast_fp16)[name = string("transpose_406")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_229 = transpose(perm = transpose_229_perm_0, x = q_75_cast_fp16)[name = string("transpose_407")];
+            tensor<fp16, [1, 16, ?, ?]> qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_229, y = transpose_230)[name = string("qk_55_cast_fp16")];
+            int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)];
+            int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)];
+            bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")];
+            tensor<int32, [2]> var_2134_begin_0 = const()[name = string("op_2134_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2134_end_mask_0 = const()[name = string("op_2134_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = concat_213, end_mask = var_2134_end_mask_0, x = mask_to_fp16)[name = string("op_2134_cast_fp16")];
+            int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)];
+            int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)];
+            bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")];
+            tensor<int32, [2]> var_2135_begin_0 = const()[name = string("op_2135_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2135_end_mask_0 = const()[name = string("op_2135_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2135_cast_fp16 = slice_by_index(begin = var_2135_begin_0, end = concat_214, end_mask = var_2135_end_mask_0, x = var_2134_cast_fp16)[name = string("op_2135_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2135_cast_fp16)[name = string("qk_57_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_2138_cast_fp16 = softmax(axis = var_2047, x = qk_57_cast_fp16)[name = string("op_2138_cast_fp16")];
+            bool var_2140_transpose_x_0 = const()[name = string("op_2140_transpose_x_0"), val = bool(false)];
+            bool var_2140_transpose_y_0 = const()[name = string("op_2140_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_95_cast_fp16 = transpose(perm = var_2131, x = var_2130_cast_fp16)[name = string("transpose_408")];
+            tensor<fp16, [1, 16, ?, 64]> var_2140_cast_fp16 = matmul(transpose_x = var_2140_transpose_x_0, transpose_y = var_2140_transpose_y_0, x = var_2138_cast_fp16, y = v_95_cast_fp16)[name = string("op_2140_cast_fp16")];
+            tensor<int32, [4]> var_2141 = const()[name = string("op_2141"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_215x = const()[name = string("concat_215x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_2142_cast_fp16 = transpose(perm = var_2141, x = var_2140_cast_fp16)[name = string("transpose_405")];
+            tensor<fp16, [1, ?, 1024]> x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2142_cast_fp16)[name = string("x_169_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2146_to_fp16 = const()[name = string("op_2146_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381461120)))];
+            tensor<fp16, [1024]> var_2147_to_fp16 = const()[name = string("op_2147_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383558336)))];
+            tensor<fp16, [1, ?, 1024]> linear_75_cast_fp16 = linear(bias = var_2147_to_fp16, weight = var_2146_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")];
+            tensor<int32, [1]> var_2154_axes_0 = const()[name = string("op_2154_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383560448)))];
+            tensor<fp16, [1024]> blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383562560)))];
+            tensor<fp16, [1, ?, 1024]> var_2154_cast_fp16 = layer_norm(axes = var_2154_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2053_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2154_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2163_to_fp16 = const()[name = string("op_2163_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383564672)))];
+            tensor<fp16, [1024]> var_2164_to_fp16 = const()[name = string("op_2164_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385661888)))];
+            tensor<fp16, [1, ?, 1024]> linear_76_cast_fp16 = linear(bias = var_2164_to_fp16, weight = var_2163_to_fp16, x = var_2154_cast_fp16)[name = string("linear_76_cast_fp16")];
+            tensor<int32, [3]> concat_216 = const()[name = string("concat_216"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_217 = const()[name = string("concat_217"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_218 = const()[name = string("concat_218"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_219 = const()[name = string("concat_219"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_220x = const()[name = string("concat_220x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2184_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2184_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_158_to_fp16 = const()[name = string("const_158_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_79_cast_fp16 = mul(x = var_2184_cast_fp16, y = const_158_to_fp16)[name = string("q_79_cast_fp16")];
+            tensor<int32, [4]> var_2190 = const()[name = string("op_2190"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2191_cast_fp16 = reshape(shape = var_2190, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2191_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_159_to_fp16 = const()[name = string("const_159_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_99_cast_fp16 = mul(x = var_2191_cast_fp16, y = const_159_to_fp16)[name = string("k_99_cast_fp16")];
+            tensor<int32, [4]> var_2197 = const()[name = string("op_2197"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2198_cast_fp16 = reshape(shape = var_2197, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2198_cast_fp16")];
+            tensor<int32, [4]> var_2199 = const()[name = string("op_2199"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)];
+            bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_231_perm_0 = const()[name = string("transpose_231_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_232_perm_0 = const()[name = string("transpose_232_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_232 = transpose(perm = transpose_232_perm_0, x = k_99_cast_fp16)[name = string("transpose_402")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_231 = transpose(perm = transpose_231_perm_0, x = q_79_cast_fp16)[name = string("transpose_403")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_231, y = transpose_232)[name = string("qk_59_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_2203_cast_fp16 = softmax(axis = var_2047, x = qk_59_cast_fp16)[name = string("op_2203_cast_fp16")];
+            bool var_2205_transpose_x_0 = const()[name = string("op_2205_transpose_x_0"), val = bool(false)];
+            bool var_2205_transpose_y_0 = const()[name = string("op_2205_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_99_cast_fp16 = transpose(perm = var_2199, x = var_2198_cast_fp16)[name = string("transpose_404")];
+            tensor<fp16, [1, 16, ?, 64]> var_2205_cast_fp16 = matmul(transpose_x = var_2205_transpose_x_0, transpose_y = var_2205_transpose_y_0, x = var_2203_cast_fp16, y = v_99_cast_fp16)[name = string("op_2205_cast_fp16")];
+            tensor<int32, [4]> var_2206 = const()[name = string("op_2206"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_221x = const()[name = string("concat_221x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_2207_cast_fp16 = transpose(perm = var_2206, x = var_2205_cast_fp16)[name = string("transpose_401")];
+            tensor<fp16, [1, ?, 1024]> x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2207_cast_fp16)[name = string("x_175_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2211_to_fp16 = const()[name = string("op_2211_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385664000)))];
+            tensor<fp16, [1024]> var_2212_to_fp16 = const()[name = string("op_2212_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387761216)))];
+            tensor<fp16, [1, ?, 1024]> linear_77_cast_fp16 = linear(bias = var_2212_to_fp16, weight = var_2211_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")];
+            tensor<int32, [1]> var_2219_axes_0 = const()[name = string("op_2219_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387763328)))];
+            tensor<fp16, [1024]> blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387765440)))];
+            tensor<fp16, [1, ?, 1024]> var_2219_cast_fp16 = layer_norm(axes = var_2219_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2053_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2219_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387767552)))];
+            tensor<fp16, [4096]> var_2229_to_fp16 = const()[name = string("op_2229_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396156224)))];
+            tensor<fp16, [1, ?, 4096]> linear_78_cast_fp16 = linear(bias = var_2229_to_fp16, weight = var_2228_to_fp16, x = var_2219_cast_fp16)[name = string("linear_78_cast_fp16")];
+            string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2234_to_fp16 = const()[name = string("op_2234_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396164480)))];
+            tensor<fp16, [1024]> var_2235_to_fp16 = const()[name = string("op_2235_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404553152)))];
+            tensor<fp16, [1, ?, 1024]> linear_79_cast_fp16 = linear(bias = var_2235_to_fp16, weight = var_2234_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")];
+            tensor<int32, [4]> k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor<int32, [4]>([11, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_66)[name = string("k_cache_41_cast_fp16")];
+            tensor<int32, [4]> v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor<int32, [4]>([11, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_67)[name = string("v_cache_41_cast_fp16")];
+            tensor<int32, [4]> k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor<int32, [4]>([11, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")];
+            tensor<int32, [4]> v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor<int32, [4]>([11, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")];
+            int32 var_2258 = const()[name = string("op_2258"), val = int32(-1)];
+            tensor<int32, [1]> var_2276_axes_0 = const()[name = string("op_2276_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404555264)))];
+            tensor<fp16, [1024]> blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404557376)))];
+            fp16 var_2264_to_fp16 = const()[name = string("op_2264_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_2276_cast_fp16 = layer_norm(axes = var_2276_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2264_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2276_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2287_to_fp16 = const()[name = string("op_2287_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404559488)))];
+            tensor<fp16, [1024]> var_2288_to_fp16 = const()[name = string("op_2288_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406656704)))];
+            tensor<fp16, [1, ?, 1024]> linear_80_cast_fp16 = linear(bias = var_2288_to_fp16, weight = var_2287_to_fp16, x = var_2276_cast_fp16)[name = string("linear_80_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2291_to_fp16 = const()[name = string("op_2291_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406658816)))];
+            tensor<fp16, [1, ?, 1024]> linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2291_to_fp16, x = var_2276_cast_fp16)[name = string("linear_81_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2295_to_fp16 = const()[name = string("op_2295_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408756032)))];
+            tensor<fp16, [1024]> var_2296_to_fp16 = const()[name = string("op_2296_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410853248)))];
+            tensor<fp16, [1, ?, 1024]> linear_82_cast_fp16 = linear(bias = var_2296_to_fp16, weight = var_2295_to_fp16, x = var_2276_cast_fp16)[name = string("linear_82_cast_fp16")];
+            tensor<int32, [3]> var_2298_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2298_shape_cast_fp16")];
+            int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)];
+            int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)];
+            bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)];
+            string var_2298_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2298_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2298_shape_cast_fp16_to_uint16 = cast(dtype = var_2298_shape_cast_fp16_to_uint16_dtype_0, x = var_2298_shape_cast_fp16)[name = string("cast_274")];
+            uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2298_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")];
+            string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_273")];
+            int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")];
+            tensor<int32, [1]> expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")];
+            tensor<int32, [1]> concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor<int32, [1]>([10])];
+            int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)];
+            bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")];
+            tensor<int32, [1]> concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)];
+            bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_66)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_68_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_68 = read_state(input = k_cache1)[name = string("coreml_update_state_68")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_67)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_69_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_69 = read_state(input = v_cache1)[name = string("coreml_update_state_69")];
+            int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)];
+            int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(1024)];
+            int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)];
+            bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")];
+            tensor<int32, [3]> var_2314_begin_0 = const()[name = string("op_2314_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2314_end_mask_0 = const()[name = string("op_2314_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = concat_230, end_mask = var_2314_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2314_cast_fp16")];
+            tensor<int32, [3]> var_2317_begin_0 = const()[name = string("op_2317_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2317_end_mask_0 = const()[name = string("op_2317_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_2317_cast_fp16 = slice_by_index(begin = var_2317_begin_0, end = concat_230, end_mask = var_2317_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2317_cast_fp16")];
+            tensor<int32, [4]> concat_232x = const()[name = string("concat_232x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2327_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2327_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_83_cast_fp16 = mul(x = var_2327_cast_fp16, y = const_160_to_fp16)[name = string("q_83_cast_fp16")];
+            tensor<int32, [4]> concat_233x = const()[name = string("concat_233x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2334_cast_fp16 = reshape(shape = concat_233x, x = var_2314_cast_fp16)[name = string("op_2334_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_105_cast_fp16 = mul(x = var_2334_cast_fp16, y = const_161_to_fp16)[name = string("k_105_cast_fp16")];
+            tensor<int32, [4]> concat_234x = const()[name = string("concat_234x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2341_cast_fp16 = reshape(shape = concat_234x, x = var_2317_cast_fp16)[name = string("op_2341_cast_fp16")];
+            tensor<int32, [4]> var_2342 = const()[name = string("op_2342"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)];
+            bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_233_perm_0 = const()[name = string("transpose_233_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_234_perm_0 = const()[name = string("transpose_234_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_234 = transpose(perm = transpose_234_perm_0, x = k_105_cast_fp16)[name = string("transpose_398")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_233 = transpose(perm = transpose_233_perm_0, x = q_83_cast_fp16)[name = string("transpose_399")];
+            tensor<fp16, [1, 16, ?, ?]> qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_233, y = transpose_234)[name = string("qk_61_cast_fp16")];
+            int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)];
+            int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)];
+            bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")];
+            tensor<int32, [2]> var_2345_begin_0 = const()[name = string("op_2345_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2345_end_mask_0 = const()[name = string("op_2345_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2345_cast_fp16 = slice_by_index(begin = var_2345_begin_0, end = concat_235, end_mask = var_2345_end_mask_0, x = mask_to_fp16)[name = string("op_2345_cast_fp16")];
+            int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)];
+            int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)];
+            bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")];
+            tensor<int32, [2]> var_2346_begin_0 = const()[name = string("op_2346_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2346_end_mask_0 = const()[name = string("op_2346_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = concat_236, end_mask = var_2346_end_mask_0, x = var_2345_cast_fp16)[name = string("op_2346_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2346_cast_fp16)[name = string("qk_63_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_2349_cast_fp16 = softmax(axis = var_2258, x = qk_63_cast_fp16)[name = string("op_2349_cast_fp16")];
+            bool var_2351_transpose_x_0 = const()[name = string("op_2351_transpose_x_0"), val = bool(false)];
+            bool var_2351_transpose_y_0 = const()[name = string("op_2351_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_105_cast_fp16 = transpose(perm = var_2342, x = var_2341_cast_fp16)[name = string("transpose_400")];
+            tensor<fp16, [1, 16, ?, 64]> var_2351_cast_fp16 = matmul(transpose_x = var_2351_transpose_x_0, transpose_y = var_2351_transpose_y_0, x = var_2349_cast_fp16, y = v_105_cast_fp16)[name = string("op_2351_cast_fp16")];
+            tensor<int32, [4]> var_2352 = const()[name = string("op_2352"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_237x = const()[name = string("concat_237x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_2353_cast_fp16 = transpose(perm = var_2352, x = var_2351_cast_fp16)[name = string("transpose_397")];
+            tensor<fp16, [1, ?, 1024]> x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2353_cast_fp16)[name = string("x_187_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2357_to_fp16 = const()[name = string("op_2357_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410855360)))];
+            tensor<fp16, [1024]> var_2358_to_fp16 = const()[name = string("op_2358_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412952576)))];
+            tensor<fp16, [1, ?, 1024]> linear_83_cast_fp16 = linear(bias = var_2358_to_fp16, weight = var_2357_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")];
+            tensor<int32, [1]> var_2365_axes_0 = const()[name = string("op_2365_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412954688)))];
+            tensor<fp16, [1024]> blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412956800)))];
+            tensor<fp16, [1, ?, 1024]> var_2365_cast_fp16 = layer_norm(axes = var_2365_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2264_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2365_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2374_to_fp16 = const()[name = string("op_2374_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412958912)))];
+            tensor<fp16, [1024]> var_2375_to_fp16 = const()[name = string("op_2375_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415056128)))];
+            tensor<fp16, [1, ?, 1024]> linear_84_cast_fp16 = linear(bias = var_2375_to_fp16, weight = var_2374_to_fp16, x = var_2365_cast_fp16)[name = string("linear_84_cast_fp16")];
+            tensor<int32, [3]> concat_238 = const()[name = string("concat_238"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_239 = const()[name = string("concat_239"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_240 = const()[name = string("concat_240"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_241 = const()[name = string("concat_241"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_242x = const()[name = string("concat_242x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2395_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2395_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_87_cast_fp16 = mul(x = var_2395_cast_fp16, y = const_162_to_fp16)[name = string("q_87_cast_fp16")];
+            tensor<int32, [4]> var_2401 = const()[name = string("op_2401"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2402_cast_fp16 = reshape(shape = var_2401, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2402_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_109_cast_fp16 = mul(x = var_2402_cast_fp16, y = const_163_to_fp16)[name = string("k_109_cast_fp16")];
+            tensor<int32, [4]> var_2408 = const()[name = string("op_2408"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2409_cast_fp16 = reshape(shape = var_2408, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2409_cast_fp16")];
+            tensor<int32, [4]> var_2410 = const()[name = string("op_2410"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)];
+            bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_235_perm_0 = const()[name = string("transpose_235_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_236_perm_0 = const()[name = string("transpose_236_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_236 = transpose(perm = transpose_236_perm_0, x = k_109_cast_fp16)[name = string("transpose_394")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_235 = transpose(perm = transpose_235_perm_0, x = q_87_cast_fp16)[name = string("transpose_395")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_235, y = transpose_236)[name = string("qk_65_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_2414_cast_fp16 = softmax(axis = var_2258, x = qk_65_cast_fp16)[name = string("op_2414_cast_fp16")];
+            bool var_2416_transpose_x_0 = const()[name = string("op_2416_transpose_x_0"), val = bool(false)];
+            bool var_2416_transpose_y_0 = const()[name = string("op_2416_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_109_cast_fp16 = transpose(perm = var_2410, x = var_2409_cast_fp16)[name = string("transpose_396")];
+            tensor<fp16, [1, 16, ?, 64]> var_2416_cast_fp16 = matmul(transpose_x = var_2416_transpose_x_0, transpose_y = var_2416_transpose_y_0, x = var_2414_cast_fp16, y = v_109_cast_fp16)[name = string("op_2416_cast_fp16")];
+            tensor<int32, [4]> var_2417 = const()[name = string("op_2417"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_243x = const()[name = string("concat_243x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_2418_cast_fp16 = transpose(perm = var_2417, x = var_2416_cast_fp16)[name = string("transpose_393")];
+            tensor<fp16, [1, ?, 1024]> x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2418_cast_fp16)[name = string("x_193_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2422_to_fp16 = const()[name = string("op_2422_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415058240)))];
+            tensor<fp16, [1024]> var_2423_to_fp16 = const()[name = string("op_2423_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417155456)))];
+            tensor<fp16, [1, ?, 1024]> linear_85_cast_fp16 = linear(bias = var_2423_to_fp16, weight = var_2422_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")];
+            tensor<int32, [1]> var_2430_axes_0 = const()[name = string("op_2430_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417157568)))];
+            tensor<fp16, [1024]> blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417159680)))];
+            tensor<fp16, [1, ?, 1024]> var_2430_cast_fp16 = layer_norm(axes = var_2430_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2264_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2430_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2439_to_fp16 = const()[name = string("op_2439_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417161792)))];
+            tensor<fp16, [4096]> var_2440_to_fp16 = const()[name = string("op_2440_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425550464)))];
+            tensor<fp16, [1, ?, 4096]> linear_86_cast_fp16 = linear(bias = var_2440_to_fp16, weight = var_2439_to_fp16, x = var_2430_cast_fp16)[name = string("linear_86_cast_fp16")];
+            string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2445_to_fp16 = const()[name = string("op_2445_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425558720)))];
+            tensor<fp16, [1024]> var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433947392)))];
+            tensor<fp16, [1, ?, 1024]> linear_87_cast_fp16 = linear(bias = var_2446_to_fp16, weight = var_2445_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")];
+            tensor<int32, [4]> k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor<int32, [4]>([12, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_68)[name = string("k_cache_45_cast_fp16")];
+            tensor<int32, [4]> v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor<int32, [4]>([12, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_69)[name = string("v_cache_45_cast_fp16")];
+            tensor<int32, [4]> k_cache_47_begin_0 = const()[name = string("k_cache_47_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_47_end_0 = const()[name = string("k_cache_47_end_0"), val = tensor<int32, [4]>([12, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_47_end_mask_0 = const()[name = string("k_cache_47_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_47_squeeze_mask_0 = const()[name = string("k_cache_47_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_47_cast_fp16 = slice_by_index(begin = k_cache_47_begin_0, end = k_cache_47_end_0, end_mask = k_cache_47_end_mask_0, squeeze_mask = k_cache_47_squeeze_mask_0, x = read_state_2)[name = string("k_cache_47_cast_fp16")];
+            tensor<int32, [4]> v_cache_47_begin_0 = const()[name = string("v_cache_47_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_47_end_0 = const()[name = string("v_cache_47_end_0"), val = tensor<int32, [4]>([12, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_47_end_mask_0 = const()[name = string("v_cache_47_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_47_squeeze_mask_0 = const()[name = string("v_cache_47_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_47_cast_fp16 = slice_by_index(begin = v_cache_47_begin_0, end = v_cache_47_end_0, end_mask = v_cache_47_end_mask_0, squeeze_mask = v_cache_47_squeeze_mask_0, x = read_state_3)[name = string("v_cache_47_cast_fp16")];
+            int32 var_2469 = const()[name = string("op_2469"), val = int32(-1)];
+            tensor<int32, [1]> var_2487_axes_0 = const()[name = string("op_2487_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433949504)))];
+            tensor<fp16, [1024]> blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433951616)))];
+            fp16 var_2475_to_fp16 = const()[name = string("op_2475_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_2487_cast_fp16 = layer_norm(axes = var_2487_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2475_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2487_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2498_to_fp16 = const()[name = string("op_2498_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433953728)))];
+            tensor<fp16, [1024]> var_2499_to_fp16 = const()[name = string("op_2499_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436050944)))];
+            tensor<fp16, [1, ?, 1024]> linear_88_cast_fp16 = linear(bias = var_2499_to_fp16, weight = var_2498_to_fp16, x = var_2487_cast_fp16)[name = string("linear_88_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2502_to_fp16 = const()[name = string("op_2502_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436053056)))];
+            tensor<fp16, [1, ?, 1024]> linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2502_to_fp16, x = var_2487_cast_fp16)[name = string("linear_89_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2506_to_fp16 = const()[name = string("op_2506_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438150272)))];
+            tensor<fp16, [1024]> var_2507_to_fp16 = const()[name = string("op_2507_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440247488)))];
+            tensor<fp16, [1, ?, 1024]> linear_90_cast_fp16 = linear(bias = var_2507_to_fp16, weight = var_2506_to_fp16, x = var_2487_cast_fp16)[name = string("linear_90_cast_fp16")];
+            tensor<int32, [3]> var_2509_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2509_shape_cast_fp16")];
+            int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)];
+            int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)];
+            bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)];
+            string var_2509_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2509_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2509_shape_cast_fp16_to_uint16 = cast(dtype = var_2509_shape_cast_fp16_to_uint16_dtype_0, x = var_2509_shape_cast_fp16)[name = string("cast_272")];
+            uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2509_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")];
+            string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_271")];
+            int32 end_step_25 = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step_25")];
+            tensor<int32, [1]> expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step_25)[name = string("expand_dims_179")];
+            tensor<int32, [1]> concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor<int32, [1]>([11])];
+            int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)];
+            bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")];
+            tensor<int32, [1]> concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)];
+            bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_68)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_70_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_70 = read_state(input = k_cache1)[name = string("coreml_update_state_70")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_69)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_71_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_71 = read_state(input = v_cache1)[name = string("coreml_update_state_71")];
+            int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)];
+            int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(1024)];
+            int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)];
+            bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step_25, concat_252_values2_0))[name = string("concat_252")];
+            tensor<int32, [3]> var_2525_begin_0 = const()[name = string("op_2525_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2525_end_mask_0 = const()[name = string("op_2525_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_2525_cast_fp16 = slice_by_index(begin = var_2525_begin_0, end = concat_252, end_mask = var_2525_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2525_cast_fp16")];
+            tensor<int32, [3]> var_2528_begin_0 = const()[name = string("op_2528_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2528_end_mask_0 = const()[name = string("op_2528_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_2528_cast_fp16 = slice_by_index(begin = var_2528_begin_0, end = concat_252, end_mask = var_2528_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2528_cast_fp16")];
+            tensor<int32, [4]> concat_254x = const()[name = string("concat_254x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2538_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2538_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_164_to_fp16 = const()[name = string("const_164_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_91_cast_fp16 = mul(x = var_2538_cast_fp16, y = const_164_to_fp16)[name = string("q_91_cast_fp16")];
+            tensor<int32, [4]> concat_255x = const()[name = string("concat_255x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2545_cast_fp16 = reshape(shape = concat_255x, x = var_2525_cast_fp16)[name = string("op_2545_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_165_to_fp16 = const()[name = string("const_165_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_115_cast_fp16 = mul(x = var_2545_cast_fp16, y = const_165_to_fp16)[name = string("k_115_cast_fp16")];
+            tensor<int32, [4]> concat_256x = const()[name = string("concat_256x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2552_cast_fp16 = reshape(shape = concat_256x, x = var_2528_cast_fp16)[name = string("op_2552_cast_fp16")];
+            tensor<int32, [4]> var_2553 = const()[name = string("op_2553"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)];
+            bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_237_perm_0 = const()[name = string("transpose_237_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_238_perm_0 = const()[name = string("transpose_238_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_238 = transpose(perm = transpose_238_perm_0, x = k_115_cast_fp16)[name = string("transpose_390")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_237 = transpose(perm = transpose_237_perm_0, x = q_91_cast_fp16)[name = string("transpose_391")];
+            tensor<fp16, [1, 16, ?, ?]> qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_237, y = transpose_238)[name = string("qk_67_cast_fp16")];
+            int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)];
+            int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)];
+            bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")];
+            tensor<int32, [2]> var_2556_begin_0 = const()[name = string("op_2556_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2556_end_mask_0 = const()[name = string("op_2556_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2556_cast_fp16 = slice_by_index(begin = var_2556_begin_0, end = concat_257, end_mask = var_2556_end_mask_0, x = mask_to_fp16)[name = string("op_2556_cast_fp16")];
+            int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)];
+            int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)];
+            bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")];
+            tensor<int32, [2]> var_2557_begin_0 = const()[name = string("op_2557_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2557_end_mask_0 = const()[name = string("op_2557_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2557_cast_fp16 = slice_by_index(begin = var_2557_begin_0, end = concat_258, end_mask = var_2557_end_mask_0, x = var_2556_cast_fp16)[name = string("op_2557_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2557_cast_fp16)[name = string("qk_69_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_2560_cast_fp16 = softmax(axis = var_2469, x = qk_69_cast_fp16)[name = string("op_2560_cast_fp16")];
+            bool var_2562_transpose_x_0 = const()[name = string("op_2562_transpose_x_0"), val = bool(false)];
+            bool var_2562_transpose_y_0 = const()[name = string("op_2562_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_115_cast_fp16 = transpose(perm = var_2553, x = var_2552_cast_fp16)[name = string("transpose_392")];
+            tensor<fp16, [1, 16, ?, 64]> var_2562_cast_fp16 = matmul(transpose_x = var_2562_transpose_x_0, transpose_y = var_2562_transpose_y_0, x = var_2560_cast_fp16, y = v_115_cast_fp16)[name = string("op_2562_cast_fp16")];
+            tensor<int32, [4]> var_2563 = const()[name = string("op_2563"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_259x = const()[name = string("concat_259x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_2564_cast_fp16 = transpose(perm = var_2563, x = var_2562_cast_fp16)[name = string("transpose_389")];
+            tensor<fp16, [1, ?, 1024]> x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2564_cast_fp16)[name = string("x_205_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2568_to_fp16 = const()[name = string("op_2568_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440249600)))];
+            tensor<fp16, [1024]> var_2569_to_fp16 = const()[name = string("op_2569_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442346816)))];
+            tensor<fp16, [1, ?, 1024]> linear_91_cast_fp16 = linear(bias = var_2569_to_fp16, weight = var_2568_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")];
+            tensor<int32, [1]> var_2576_axes_0 = const()[name = string("op_2576_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442348928)))];
+            tensor<fp16, [1024]> blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442351040)))];
+            tensor<fp16, [1, ?, 1024]> var_2576_cast_fp16 = layer_norm(axes = var_2576_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2475_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2576_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2585_to_fp16 = const()[name = string("op_2585_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442353152)))];
+            tensor<fp16, [1024]> var_2586_to_fp16 = const()[name = string("op_2586_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444450368)))];
+            tensor<fp16, [1, ?, 1024]> linear_92_cast_fp16 = linear(bias = var_2586_to_fp16, weight = var_2585_to_fp16, x = var_2576_cast_fp16)[name = string("linear_92_cast_fp16")];
+            tensor<int32, [3]> concat_260 = const()[name = string("concat_260"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_261 = const()[name = string("concat_261"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_262 = const()[name = string("concat_262"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_263 = const()[name = string("concat_263"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_264x = const()[name = string("concat_264x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2606_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2606_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_166_to_fp16 = const()[name = string("const_166_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_95_cast_fp16 = mul(x = var_2606_cast_fp16, y = const_166_to_fp16)[name = string("q_95_cast_fp16")];
+            tensor<int32, [4]> var_2612 = const()[name = string("op_2612"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2613_cast_fp16 = reshape(shape = var_2612, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2613_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_167_to_fp16 = const()[name = string("const_167_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_119_cast_fp16 = mul(x = var_2613_cast_fp16, y = const_167_to_fp16)[name = string("k_119_cast_fp16")];
+            tensor<int32, [4]> var_2619 = const()[name = string("op_2619"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2620_cast_fp16 = reshape(shape = var_2619, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2620_cast_fp16")];
+            tensor<int32, [4]> var_2621 = const()[name = string("op_2621"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_71_transpose_x_0 = const()[name = string("qk_71_transpose_x_0"), val = bool(false)];
+            bool qk_71_transpose_y_0 = const()[name = string("qk_71_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_239_perm_0 = const()[name = string("transpose_239_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_240_perm_0 = const()[name = string("transpose_240_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_240 = transpose(perm = transpose_240_perm_0, x = k_119_cast_fp16)[name = string("transpose_386")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_239 = transpose(perm = transpose_239_perm_0, x = q_95_cast_fp16)[name = string("transpose_387")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_71_cast_fp16 = matmul(transpose_x = qk_71_transpose_x_0, transpose_y = qk_71_transpose_y_0, x = transpose_239, y = transpose_240)[name = string("qk_71_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_2625_cast_fp16 = softmax(axis = var_2469, x = qk_71_cast_fp16)[name = string("op_2625_cast_fp16")];
+            bool var_2627_transpose_x_0 = const()[name = string("op_2627_transpose_x_0"), val = bool(false)];
+            bool var_2627_transpose_y_0 = const()[name = string("op_2627_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_119_cast_fp16 = transpose(perm = var_2621, x = var_2620_cast_fp16)[name = string("transpose_388")];
+            tensor<fp16, [1, 16, ?, 64]> var_2627_cast_fp16 = matmul(transpose_x = var_2627_transpose_x_0, transpose_y = var_2627_transpose_y_0, x = var_2625_cast_fp16, y = v_119_cast_fp16)[name = string("op_2627_cast_fp16")];
+            tensor<int32, [4]> var_2628 = const()[name = string("op_2628"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_265x = const()[name = string("concat_265x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_2629_cast_fp16 = transpose(perm = var_2628, x = var_2627_cast_fp16)[name = string("transpose_385")];
+            tensor<fp16, [1, ?, 1024]> x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2629_cast_fp16)[name = string("x_211_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2633_to_fp16 = const()[name = string("op_2633_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444452480)))];
+            tensor<fp16, [1024]> var_2634_to_fp16 = const()[name = string("op_2634_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446549696)))];
+            tensor<fp16, [1, ?, 1024]> linear_93_cast_fp16 = linear(bias = var_2634_to_fp16, weight = var_2633_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")];
+            tensor<int32, [1]> var_2641_axes_0 = const()[name = string("op_2641_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446551808)))];
+            tensor<fp16, [1024]> blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446553920)))];
+            tensor<fp16, [1, ?, 1024]> var_2641_cast_fp16 = layer_norm(axes = var_2641_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2475_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2641_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2650_to_fp16 = const()[name = string("op_2650_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446556032)))];
+            tensor<fp16, [4096]> var_2651_to_fp16 = const()[name = string("op_2651_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454944704)))];
+            tensor<fp16, [1, ?, 4096]> linear_94_cast_fp16 = linear(bias = var_2651_to_fp16, weight = var_2650_to_fp16, x = var_2641_cast_fp16)[name = string("linear_94_cast_fp16")];
+            string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2656_to_fp16 = const()[name = string("op_2656_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454952960)))];
+            tensor<fp16, [1024]> var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463341632)))];
+            tensor<fp16, [1, ?, 1024]> linear_95_cast_fp16 = linear(bias = var_2657_to_fp16, weight = var_2656_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")];
+            tensor<int32, [4]> k_cache_49_begin_0 = const()[name = string("k_cache_49_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_49_end_0 = const()[name = string("k_cache_49_end_0"), val = tensor<int32, [4]>([13, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_49_end_mask_0 = const()[name = string("k_cache_49_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_49_squeeze_mask_0 = const()[name = string("k_cache_49_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_49_cast_fp16 = slice_by_index(begin = k_cache_49_begin_0, end = k_cache_49_end_0, end_mask = k_cache_49_end_mask_0, squeeze_mask = k_cache_49_squeeze_mask_0, x = coreml_update_state_70)[name = string("k_cache_49_cast_fp16")];
+            tensor<int32, [4]> v_cache_49_begin_0 = const()[name = string("v_cache_49_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_49_end_0 = const()[name = string("v_cache_49_end_0"), val = tensor<int32, [4]>([13, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_49_end_mask_0 = const()[name = string("v_cache_49_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_49_squeeze_mask_0 = const()[name = string("v_cache_49_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_49_cast_fp16 = slice_by_index(begin = v_cache_49_begin_0, end = v_cache_49_end_0, end_mask = v_cache_49_end_mask_0, squeeze_mask = v_cache_49_squeeze_mask_0, x = coreml_update_state_71)[name = string("v_cache_49_cast_fp16")];
+            tensor<int32, [4]> k_cache_51_begin_0 = const()[name = string("k_cache_51_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_51_end_0 = const()[name = string("k_cache_51_end_0"), val = tensor<int32, [4]>([13, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_51_end_mask_0 = const()[name = string("k_cache_51_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_51_squeeze_mask_0 = const()[name = string("k_cache_51_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_51_cast_fp16 = slice_by_index(begin = k_cache_51_begin_0, end = k_cache_51_end_0, end_mask = k_cache_51_end_mask_0, squeeze_mask = k_cache_51_squeeze_mask_0, x = read_state_2)[name = string("k_cache_51_cast_fp16")];
+            tensor<int32, [4]> v_cache_51_begin_0 = const()[name = string("v_cache_51_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_51_end_0 = const()[name = string("v_cache_51_end_0"), val = tensor<int32, [4]>([13, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_51_end_mask_0 = const()[name = string("v_cache_51_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_51_squeeze_mask_0 = const()[name = string("v_cache_51_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_51_cast_fp16 = slice_by_index(begin = v_cache_51_begin_0, end = v_cache_51_end_0, end_mask = v_cache_51_end_mask_0, squeeze_mask = v_cache_51_squeeze_mask_0, x = read_state_3)[name = string("v_cache_51_cast_fp16")];
+            int32 var_2680 = const()[name = string("op_2680"), val = int32(-1)];
+            tensor<int32, [1]> var_2698_axes_0 = const()[name = string("op_2698_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463343744)))];
+            tensor<fp16, [1024]> blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463345856)))];
+            fp16 var_2686_to_fp16 = const()[name = string("op_2686_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_2698_cast_fp16 = layer_norm(axes = var_2698_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_2686_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2698_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2709_to_fp16 = const()[name = string("op_2709_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463347968)))];
+            tensor<fp16, [1024]> var_2710_to_fp16 = const()[name = string("op_2710_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465445184)))];
+            tensor<fp16, [1, ?, 1024]> linear_96_cast_fp16 = linear(bias = var_2710_to_fp16, weight = var_2709_to_fp16, x = var_2698_cast_fp16)[name = string("linear_96_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2713_to_fp16 = const()[name = string("op_2713_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465447296)))];
+            tensor<fp16, [1, ?, 1024]> linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2713_to_fp16, x = var_2698_cast_fp16)[name = string("linear_97_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467544512)))];
+            tensor<fp16, [1024]> var_2718_to_fp16 = const()[name = string("op_2718_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469641728)))];
+            tensor<fp16, [1, ?, 1024]> linear_98_cast_fp16 = linear(bias = var_2718_to_fp16, weight = var_2717_to_fp16, x = var_2698_cast_fp16)[name = string("linear_98_cast_fp16")];
+            tensor<int32, [3]> var_2720_shape_cast_fp16 = shape(x = linear_96_cast_fp16)[name = string("op_2720_shape_cast_fp16")];
+            int32 gather_146_axis_0 = const()[name = string("gather_146_axis_0"), val = int32(0)];
+            int32 gather_146_batch_dims_0 = const()[name = string("gather_146_batch_dims_0"), val = int32(0)];
+            bool gather_146_validate_indices_0 = const()[name = string("gather_146_validate_indices_0"), val = bool(false)];
+            string var_2720_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2720_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_146_to_uint16 = const()[name = string("select_146_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2720_shape_cast_fp16_to_uint16 = cast(dtype = var_2720_shape_cast_fp16_to_uint16_dtype_0, x = var_2720_shape_cast_fp16)[name = string("cast_270")];
+            uint16 gather_146_cast_uint16 = gather(axis = gather_146_axis_0, batch_dims = gather_146_batch_dims_0, indices = select_146_to_uint16, validate_indices = gather_146_validate_indices_0, x = var_2720_shape_cast_fp16_to_uint16)[name = string("gather_146_cast_uint16")];
+            string gather_146_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_146_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_146_cast_uint16_to_int32 = cast(dtype = gather_146_cast_uint16_to_int32_dtype_0, x = gather_146_cast_uint16)[name = string("cast_269")];
+            int32 end_step_27 = add(x = offset, y = gather_146_cast_uint16_to_int32)[name = string("end_step_27")];
+            tensor<int32, [1]> expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = end_step_27)[name = string("expand_dims_195")];
+            tensor<int32, [1]> concat_268_values0_0 = const()[name = string("concat_268_values0_0"), val = tensor<int32, [1]>([12])];
+            int32 concat_268_axis_0 = const()[name = string("concat_268_axis_0"), val = int32(0)];
+            bool concat_268_interleave_0 = const()[name = string("concat_268_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_268 = concat(axis = concat_268_axis_0, interleave = concat_268_interleave_0, values = (concat_268_values0_0, expand_dims_192, expand_dims_1, expand_dims_194))[name = string("concat_268")];
+            tensor<int32, [1]> concat_269_values0_0 = const()[name = string("concat_269_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_269_values1_0 = const()[name = string("concat_269_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_269_values3_0 = const()[name = string("concat_269_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_269_axis_0 = const()[name = string("concat_269_axis_0"), val = int32(0)];
+            bool concat_269_interleave_0 = const()[name = string("concat_269_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_269 = concat(axis = concat_269_axis_0, interleave = concat_269_interleave_0, values = (concat_269_values0_0, concat_269_values1_0, expand_dims_195, concat_269_values3_0))[name = string("concat_269")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = k_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = k_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_13_stride_0, update = linear_97_cast_fp16, x = coreml_update_state_70)[name = string("k_cache1_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_13_cast_fp16, input = k_cache1)[name = string("coreml_update_state_72_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_72 = read_state(input = k_cache1)[name = string("coreml_update_state_72")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = v_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = v_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_13_stride_0, update = linear_98_cast_fp16, x = coreml_update_state_71)[name = string("v_cache1_internal_tensor_assign_13_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_13_cast_fp16, input = v_cache1)[name = string("coreml_update_state_73_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_73 = read_state(input = v_cache1)[name = string("coreml_update_state_73")];
+            int32 concat_274_values0_0 = const()[name = string("concat_274_values0_0"), val = int32(1)];
+            int32 concat_274_values2_0 = const()[name = string("concat_274_values2_0"), val = int32(1024)];
+            int32 concat_274_axis_0 = const()[name = string("concat_274_axis_0"), val = int32(0)];
+            bool concat_274_interleave_0 = const()[name = string("concat_274_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_274 = concat(axis = concat_274_axis_0, interleave = concat_274_interleave_0, values = (concat_274_values0_0, end_step_27, concat_274_values2_0))[name = string("concat_274")];
+            tensor<int32, [3]> var_2736_begin_0 = const()[name = string("op_2736_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2736_end_mask_0 = const()[name = string("op_2736_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_2736_cast_fp16 = slice_by_index(begin = var_2736_begin_0, end = concat_274, end_mask = var_2736_end_mask_0, x = k_cache_49_cast_fp16)[name = string("op_2736_cast_fp16")];
+            tensor<int32, [3]> var_2739_begin_0 = const()[name = string("op_2739_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2739_end_mask_0 = const()[name = string("op_2739_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_2739_cast_fp16 = slice_by_index(begin = var_2739_begin_0, end = concat_274, end_mask = var_2739_end_mask_0, x = v_cache_49_cast_fp16)[name = string("op_2739_cast_fp16")];
+            tensor<int32, [4]> concat_276x = const()[name = string("concat_276x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2749_cast_fp16 = reshape(shape = concat_276x, x = linear_96_cast_fp16)[name = string("op_2749_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_99_cast_fp16 = mul(x = var_2749_cast_fp16, y = const_168_to_fp16)[name = string("q_99_cast_fp16")];
+            tensor<int32, [4]> concat_277x = const()[name = string("concat_277x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2756_cast_fp16 = reshape(shape = concat_277x, x = var_2736_cast_fp16)[name = string("op_2756_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_125_cast_fp16 = mul(x = var_2756_cast_fp16, y = const_169_to_fp16)[name = string("k_125_cast_fp16")];
+            tensor<int32, [4]> concat_278x = const()[name = string("concat_278x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2763_cast_fp16 = reshape(shape = concat_278x, x = var_2739_cast_fp16)[name = string("op_2763_cast_fp16")];
+            tensor<int32, [4]> var_2764 = const()[name = string("op_2764"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_73_transpose_x_0 = const()[name = string("qk_73_transpose_x_0"), val = bool(false)];
+            bool qk_73_transpose_y_0 = const()[name = string("qk_73_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_241_perm_0 = const()[name = string("transpose_241_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_242_perm_0 = const()[name = string("transpose_242_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_242 = transpose(perm = transpose_242_perm_0, x = k_125_cast_fp16)[name = string("transpose_382")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_241 = transpose(perm = transpose_241_perm_0, x = q_99_cast_fp16)[name = string("transpose_383")];
+            tensor<fp16, [1, 16, ?, ?]> qk_73_cast_fp16 = matmul(transpose_x = qk_73_transpose_x_0, transpose_y = qk_73_transpose_y_0, x = transpose_241, y = transpose_242)[name = string("qk_73_cast_fp16")];
+            int32 concat_279_values1_0 = const()[name = string("concat_279_values1_0"), val = int32(448)];
+            int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)];
+            bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_146_cast_uint16_to_int32, concat_279_values1_0))[name = string("concat_279")];
+            tensor<int32, [2]> var_2767_begin_0 = const()[name = string("op_2767_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2767_end_mask_0 = const()[name = string("op_2767_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2767_cast_fp16 = slice_by_index(begin = var_2767_begin_0, end = concat_279, end_mask = var_2767_end_mask_0, x = mask_to_fp16)[name = string("op_2767_cast_fp16")];
+            int32 concat_280_values0_0 = const()[name = string("concat_280_values0_0"), val = int32(0)];
+            int32 concat_280_axis_0 = const()[name = string("concat_280_axis_0"), val = int32(0)];
+            bool concat_280_interleave_0 = const()[name = string("concat_280_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_280 = concat(axis = concat_280_axis_0, interleave = concat_280_interleave_0, values = (concat_280_values0_0, gather_146_cast_uint16_to_int32))[name = string("concat_280")];
+            tensor<int32, [2]> var_2768_begin_0 = const()[name = string("op_2768_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2768_end_mask_0 = const()[name = string("op_2768_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2768_cast_fp16 = slice_by_index(begin = var_2768_begin_0, end = concat_280, end_mask = var_2768_end_mask_0, x = var_2767_cast_fp16)[name = string("op_2768_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_75_cast_fp16 = add(x = qk_73_cast_fp16, y = var_2768_cast_fp16)[name = string("qk_75_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_2771_cast_fp16 = softmax(axis = var_2680, x = qk_75_cast_fp16)[name = string("op_2771_cast_fp16")];
+            bool var_2773_transpose_x_0 = const()[name = string("op_2773_transpose_x_0"), val = bool(false)];
+            bool var_2773_transpose_y_0 = const()[name = string("op_2773_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_125_cast_fp16 = transpose(perm = var_2764, x = var_2763_cast_fp16)[name = string("transpose_384")];
+            tensor<fp16, [1, 16, ?, 64]> var_2773_cast_fp16 = matmul(transpose_x = var_2773_transpose_x_0, transpose_y = var_2773_transpose_y_0, x = var_2771_cast_fp16, y = v_125_cast_fp16)[name = string("op_2773_cast_fp16")];
+            tensor<int32, [4]> var_2774 = const()[name = string("op_2774"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_281x = const()[name = string("concat_281x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_2775_cast_fp16 = transpose(perm = var_2774, x = var_2773_cast_fp16)[name = string("transpose_381")];
+            tensor<fp16, [1, ?, 1024]> x_223_cast_fp16 = reshape(shape = concat_281x, x = var_2775_cast_fp16)[name = string("x_223_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2779_to_fp16 = const()[name = string("op_2779_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469643840)))];
+            tensor<fp16, [1024]> var_2780_to_fp16 = const()[name = string("op_2780_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471741056)))];
+            tensor<fp16, [1, ?, 1024]> linear_99_cast_fp16 = linear(bias = var_2780_to_fp16, weight = var_2779_to_fp16, x = x_223_cast_fp16)[name = string("linear_99_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_225_cast_fp16 = add(x = x_219_cast_fp16, y = linear_99_cast_fp16)[name = string("x_225_cast_fp16")];
+            tensor<int32, [1]> var_2787_axes_0 = const()[name = string("op_2787_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_12_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471743168)))];
+            tensor<fp16, [1024]> blocks_12_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471745280)))];
+            tensor<fp16, [1, ?, 1024]> var_2787_cast_fp16 = layer_norm(axes = var_2787_axes_0, beta = blocks_12_cross_attn_ln_bias_to_fp16, epsilon = var_2686_to_fp16, gamma = blocks_12_cross_attn_ln_weight_to_fp16, x = x_225_cast_fp16)[name = string("op_2787_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2796_to_fp16 = const()[name = string("op_2796_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471747392)))];
+            tensor<fp16, [1024]> var_2797_to_fp16 = const()[name = string("op_2797_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473844608)))];
+            tensor<fp16, [1, ?, 1024]> linear_100_cast_fp16 = linear(bias = var_2797_to_fp16, weight = var_2796_to_fp16, x = var_2787_cast_fp16)[name = string("linear_100_cast_fp16")];
+            tensor<int32, [3]> concat_282 = const()[name = string("concat_282"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_283 = const()[name = string("concat_283"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_127_internal_tensor_assign_1_stride_0 = const()[name = string("k_127_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_282, begin_mask = k_127_internal_tensor_assign_1_begin_mask_0, end = concat_283, end_mask = k_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_127_internal_tensor_assign_1_squeeze_mask_0, stride = k_127_internal_tensor_assign_1_stride_0, update = k_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("k_127_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_284 = const()[name = string("concat_284"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_285 = const()[name = string("concat_285"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_127_internal_tensor_assign_1_stride_0 = const()[name = string("v_127_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_284, begin_mask = v_127_internal_tensor_assign_1_begin_mask_0, end = concat_285, end_mask = v_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_127_internal_tensor_assign_1_squeeze_mask_0, stride = v_127_internal_tensor_assign_1_stride_0, update = v_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("v_127_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_286x = const()[name = string("concat_286x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2817_cast_fp16 = reshape(shape = concat_286x, x = linear_100_cast_fp16)[name = string("op_2817_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_103_cast_fp16 = mul(x = var_2817_cast_fp16, y = const_170_to_fp16)[name = string("q_103_cast_fp16")];
+            tensor<int32, [4]> var_2823 = const()[name = string("op_2823"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2824_cast_fp16 = reshape(shape = var_2823, x = k_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2824_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_129_cast_fp16 = mul(x = var_2824_cast_fp16, y = const_171_to_fp16)[name = string("k_129_cast_fp16")];
+            tensor<int32, [4]> var_2830 = const()[name = string("op_2830"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2831_cast_fp16 = reshape(shape = var_2830, x = v_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2831_cast_fp16")];
+            tensor<int32, [4]> var_2832 = const()[name = string("op_2832"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_77_transpose_x_0 = const()[name = string("qk_77_transpose_x_0"), val = bool(false)];
+            bool qk_77_transpose_y_0 = const()[name = string("qk_77_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_243_perm_0 = const()[name = string("transpose_243_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_244_perm_0 = const()[name = string("transpose_244_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_244 = transpose(perm = transpose_244_perm_0, x = k_129_cast_fp16)[name = string("transpose_378")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_243 = transpose(perm = transpose_243_perm_0, x = q_103_cast_fp16)[name = string("transpose_379")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_77_cast_fp16 = matmul(transpose_x = qk_77_transpose_x_0, transpose_y = qk_77_transpose_y_0, x = transpose_243, y = transpose_244)[name = string("qk_77_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_2836_cast_fp16 = softmax(axis = var_2680, x = qk_77_cast_fp16)[name = string("op_2836_cast_fp16")];
+            bool var_2838_transpose_x_0 = const()[name = string("op_2838_transpose_x_0"), val = bool(false)];
+            bool var_2838_transpose_y_0 = const()[name = string("op_2838_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_129_cast_fp16 = transpose(perm = var_2832, x = var_2831_cast_fp16)[name = string("transpose_380")];
+            tensor<fp16, [1, 16, ?, 64]> var_2838_cast_fp16 = matmul(transpose_x = var_2838_transpose_x_0, transpose_y = var_2838_transpose_y_0, x = var_2836_cast_fp16, y = v_129_cast_fp16)[name = string("op_2838_cast_fp16")];
+            tensor<int32, [4]> var_2839 = const()[name = string("op_2839"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_287x = const()[name = string("concat_287x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_2840_cast_fp16 = transpose(perm = var_2839, x = var_2838_cast_fp16)[name = string("transpose_377")];
+            tensor<fp16, [1, ?, 1024]> x_229_cast_fp16 = reshape(shape = concat_287x, x = var_2840_cast_fp16)[name = string("x_229_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2844_to_fp16 = const()[name = string("op_2844_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473846720)))];
+            tensor<fp16, [1024]> var_2845_to_fp16 = const()[name = string("op_2845_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475943936)))];
+            tensor<fp16, [1, ?, 1024]> linear_101_cast_fp16 = linear(bias = var_2845_to_fp16, weight = var_2844_to_fp16, x = x_229_cast_fp16)[name = string("linear_101_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_231_cast_fp16 = add(x = x_225_cast_fp16, y = linear_101_cast_fp16)[name = string("x_231_cast_fp16")];
+            tensor<int32, [1]> var_2852_axes_0 = const()[name = string("op_2852_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475946048)))];
+            tensor<fp16, [1024]> blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475948160)))];
+            tensor<fp16, [1, ?, 1024]> var_2852_cast_fp16 = layer_norm(axes = var_2852_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_2686_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_231_cast_fp16)[name = string("op_2852_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2861_to_fp16 = const()[name = string("op_2861_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475950272)))];
+            tensor<fp16, [4096]> var_2862_to_fp16 = const()[name = string("op_2862_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484338944)))];
+            tensor<fp16, [1, ?, 4096]> linear_102_cast_fp16 = linear(bias = var_2862_to_fp16, weight = var_2861_to_fp16, x = var_2852_cast_fp16)[name = string("linear_102_cast_fp16")];
+            string x_235_mode_0 = const()[name = string("x_235_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_235_cast_fp16 = gelu(mode = x_235_mode_0, x = linear_102_cast_fp16)[name = string("x_235_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2867_to_fp16 = const()[name = string("op_2867_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484347200)))];
+            tensor<fp16, [1024]> var_2868_to_fp16 = const()[name = string("op_2868_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492735872)))];
+            tensor<fp16, [1, ?, 1024]> linear_103_cast_fp16 = linear(bias = var_2868_to_fp16, weight = var_2867_to_fp16, x = x_235_cast_fp16)[name = string("linear_103_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_237_cast_fp16 = add(x = x_231_cast_fp16, y = linear_103_cast_fp16)[name = string("x_237_cast_fp16")];
+            tensor<int32, [4]> k_cache_53_begin_0 = const()[name = string("k_cache_53_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_53_end_0 = const()[name = string("k_cache_53_end_0"), val = tensor<int32, [4]>([14, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_53_end_mask_0 = const()[name = string("k_cache_53_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_53_squeeze_mask_0 = const()[name = string("k_cache_53_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_53_cast_fp16 = slice_by_index(begin = k_cache_53_begin_0, end = k_cache_53_end_0, end_mask = k_cache_53_end_mask_0, squeeze_mask = k_cache_53_squeeze_mask_0, x = coreml_update_state_72)[name = string("k_cache_53_cast_fp16")];
+            tensor<int32, [4]> v_cache_53_begin_0 = const()[name = string("v_cache_53_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_53_end_0 = const()[name = string("v_cache_53_end_0"), val = tensor<int32, [4]>([14, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_53_end_mask_0 = const()[name = string("v_cache_53_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_53_squeeze_mask_0 = const()[name = string("v_cache_53_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_53_cast_fp16 = slice_by_index(begin = v_cache_53_begin_0, end = v_cache_53_end_0, end_mask = v_cache_53_end_mask_0, squeeze_mask = v_cache_53_squeeze_mask_0, x = coreml_update_state_73)[name = string("v_cache_53_cast_fp16")];
+            tensor<int32, [4]> k_cache_55_begin_0 = const()[name = string("k_cache_55_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_55_end_0 = const()[name = string("k_cache_55_end_0"), val = tensor<int32, [4]>([14, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_55_end_mask_0 = const()[name = string("k_cache_55_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_55_squeeze_mask_0 = const()[name = string("k_cache_55_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_55_cast_fp16 = slice_by_index(begin = k_cache_55_begin_0, end = k_cache_55_end_0, end_mask = k_cache_55_end_mask_0, squeeze_mask = k_cache_55_squeeze_mask_0, x = read_state_2)[name = string("k_cache_55_cast_fp16")];
+            tensor<int32, [4]> v_cache_55_begin_0 = const()[name = string("v_cache_55_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_55_end_0 = const()[name = string("v_cache_55_end_0"), val = tensor<int32, [4]>([14, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_55_end_mask_0 = const()[name = string("v_cache_55_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_55_squeeze_mask_0 = const()[name = string("v_cache_55_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_55_cast_fp16 = slice_by_index(begin = v_cache_55_begin_0, end = v_cache_55_end_0, end_mask = v_cache_55_end_mask_0, squeeze_mask = v_cache_55_squeeze_mask_0, x = read_state_3)[name = string("v_cache_55_cast_fp16")];
+            int32 var_2891 = const()[name = string("op_2891"), val = int32(-1)];
+            tensor<int32, [1]> var_2909_axes_0 = const()[name = string("op_2909_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492737984)))];
+            tensor<fp16, [1024]> blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492740096)))];
+            fp16 var_2897_to_fp16 = const()[name = string("op_2897_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_2909_cast_fp16 = layer_norm(axes = var_2909_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_2897_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_237_cast_fp16)[name = string("op_2909_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2920_to_fp16 = const()[name = string("op_2920_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492742208)))];
+            tensor<fp16, [1024]> var_2921_to_fp16 = const()[name = string("op_2921_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494839424)))];
+            tensor<fp16, [1, ?, 1024]> linear_104_cast_fp16 = linear(bias = var_2921_to_fp16, weight = var_2920_to_fp16, x = var_2909_cast_fp16)[name = string("linear_104_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2924_to_fp16 = const()[name = string("op_2924_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494841536)))];
+            tensor<fp16, [1, ?, 1024]> linear_105_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2924_to_fp16, x = var_2909_cast_fp16)[name = string("linear_105_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2928_to_fp16 = const()[name = string("op_2928_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496938752)))];
+            tensor<fp16, [1024]> var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499035968)))];
+            tensor<fp16, [1, ?, 1024]> linear_106_cast_fp16 = linear(bias = var_2929_to_fp16, weight = var_2928_to_fp16, x = var_2909_cast_fp16)[name = string("linear_106_cast_fp16")];
+            tensor<int32, [3]> var_2931_shape_cast_fp16 = shape(x = linear_104_cast_fp16)[name = string("op_2931_shape_cast_fp16")];
+            int32 gather_158_axis_0 = const()[name = string("gather_158_axis_0"), val = int32(0)];
+            int32 gather_158_batch_dims_0 = const()[name = string("gather_158_batch_dims_0"), val = int32(0)];
+            bool gather_158_validate_indices_0 = const()[name = string("gather_158_validate_indices_0"), val = bool(false)];
+            string var_2931_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2931_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_158_to_uint16 = const()[name = string("select_158_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2931_shape_cast_fp16_to_uint16 = cast(dtype = var_2931_shape_cast_fp16_to_uint16_dtype_0, x = var_2931_shape_cast_fp16)[name = string("cast_268")];
+            uint16 gather_158_cast_uint16 = gather(axis = gather_158_axis_0, batch_dims = gather_158_batch_dims_0, indices = select_158_to_uint16, validate_indices = gather_158_validate_indices_0, x = var_2931_shape_cast_fp16_to_uint16)[name = string("gather_158_cast_uint16")];
+            string gather_158_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_158_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_158_cast_uint16_to_int32 = cast(dtype = gather_158_cast_uint16_to_int32_dtype_0, x = gather_158_cast_uint16)[name = string("cast_267")];
+            int32 end_step_29 = add(x = offset, y = gather_158_cast_uint16_to_int32)[name = string("end_step_29")];
+            tensor<int32, [1]> expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = end_step_29)[name = string("expand_dims_211")];
+            tensor<int32, [1]> concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor<int32, [1]>([13])];
+            int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)];
+            bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_208, expand_dims_1, expand_dims_210))[name = string("concat_290")];
+            tensor<int32, [1]> concat_291_values0_0 = const()[name = string("concat_291_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)];
+            bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (concat_291_values0_0, concat_291_values1_0, expand_dims_211, concat_291_values3_0))[name = string("concat_291")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = k_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = k_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_14_stride_0, update = linear_105_cast_fp16, x = coreml_update_state_72)[name = string("k_cache1_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_14_cast_fp16, input = k_cache1)[name = string("coreml_update_state_74_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_74 = read_state(input = k_cache1)[name = string("coreml_update_state_74")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = v_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = v_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_14_stride_0, update = linear_106_cast_fp16, x = coreml_update_state_73)[name = string("v_cache1_internal_tensor_assign_14_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_14_cast_fp16, input = v_cache1)[name = string("coreml_update_state_75_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_75 = read_state(input = v_cache1)[name = string("coreml_update_state_75")];
+            int32 concat_296_values0_0 = const()[name = string("concat_296_values0_0"), val = int32(1)];
+            int32 concat_296_values2_0 = const()[name = string("concat_296_values2_0"), val = int32(1024)];
+            int32 concat_296_axis_0 = const()[name = string("concat_296_axis_0"), val = int32(0)];
+            bool concat_296_interleave_0 = const()[name = string("concat_296_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_296 = concat(axis = concat_296_axis_0, interleave = concat_296_interleave_0, values = (concat_296_values0_0, end_step_29, concat_296_values2_0))[name = string("concat_296")];
+            tensor<int32, [3]> var_2947_begin_0 = const()[name = string("op_2947_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2947_end_mask_0 = const()[name = string("op_2947_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_2947_cast_fp16 = slice_by_index(begin = var_2947_begin_0, end = concat_296, end_mask = var_2947_end_mask_0, x = k_cache_53_cast_fp16)[name = string("op_2947_cast_fp16")];
+            tensor<int32, [3]> var_2950_begin_0 = const()[name = string("op_2950_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2950_end_mask_0 = const()[name = string("op_2950_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_2950_cast_fp16 = slice_by_index(begin = var_2950_begin_0, end = concat_296, end_mask = var_2950_end_mask_0, x = v_cache_53_cast_fp16)[name = string("op_2950_cast_fp16")];
+            tensor<int32, [4]> concat_298x = const()[name = string("concat_298x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2960_cast_fp16 = reshape(shape = concat_298x, x = linear_104_cast_fp16)[name = string("op_2960_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_107_cast_fp16 = mul(x = var_2960_cast_fp16, y = const_172_to_fp16)[name = string("q_107_cast_fp16")];
+            tensor<int32, [4]> concat_299x = const()[name = string("concat_299x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2967_cast_fp16 = reshape(shape = concat_299x, x = var_2947_cast_fp16)[name = string("op_2967_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_135_cast_fp16 = mul(x = var_2967_cast_fp16, y = const_173_to_fp16)[name = string("k_135_cast_fp16")];
+            tensor<int32, [4]> concat_300x = const()[name = string("concat_300x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_2974_cast_fp16 = reshape(shape = concat_300x, x = var_2950_cast_fp16)[name = string("op_2974_cast_fp16")];
+            tensor<int32, [4]> var_2975 = const()[name = string("op_2975"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_79_transpose_x_0 = const()[name = string("qk_79_transpose_x_0"), val = bool(false)];
+            bool qk_79_transpose_y_0 = const()[name = string("qk_79_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_245_perm_0 = const()[name = string("transpose_245_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_246_perm_0 = const()[name = string("transpose_246_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_246 = transpose(perm = transpose_246_perm_0, x = k_135_cast_fp16)[name = string("transpose_374")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_245 = transpose(perm = transpose_245_perm_0, x = q_107_cast_fp16)[name = string("transpose_375")];
+            tensor<fp16, [1, 16, ?, ?]> qk_79_cast_fp16 = matmul(transpose_x = qk_79_transpose_x_0, transpose_y = qk_79_transpose_y_0, x = transpose_245, y = transpose_246)[name = string("qk_79_cast_fp16")];
+            int32 concat_301_values1_0 = const()[name = string("concat_301_values1_0"), val = int32(448)];
+            int32 concat_301_axis_0 = const()[name = string("concat_301_axis_0"), val = int32(0)];
+            bool concat_301_interleave_0 = const()[name = string("concat_301_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_301 = concat(axis = concat_301_axis_0, interleave = concat_301_interleave_0, values = (gather_158_cast_uint16_to_int32, concat_301_values1_0))[name = string("concat_301")];
+            tensor<int32, [2]> var_2978_begin_0 = const()[name = string("op_2978_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2978_end_mask_0 = const()[name = string("op_2978_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2978_cast_fp16 = slice_by_index(begin = var_2978_begin_0, end = concat_301, end_mask = var_2978_end_mask_0, x = mask_to_fp16)[name = string("op_2978_cast_fp16")];
+            int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(0)];
+            int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)];
+            bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, gather_158_cast_uint16_to_int32))[name = string("concat_302")];
+            tensor<int32, [2]> var_2979_begin_0 = const()[name = string("op_2979_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2979_end_mask_0 = const()[name = string("op_2979_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2979_cast_fp16 = slice_by_index(begin = var_2979_begin_0, end = concat_302, end_mask = var_2979_end_mask_0, x = var_2978_cast_fp16)[name = string("op_2979_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_81_cast_fp16 = add(x = qk_79_cast_fp16, y = var_2979_cast_fp16)[name = string("qk_81_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_2982_cast_fp16 = softmax(axis = var_2891, x = qk_81_cast_fp16)[name = string("op_2982_cast_fp16")];
+            bool var_2984_transpose_x_0 = const()[name = string("op_2984_transpose_x_0"), val = bool(false)];
+            bool var_2984_transpose_y_0 = const()[name = string("op_2984_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_135_cast_fp16 = transpose(perm = var_2975, x = var_2974_cast_fp16)[name = string("transpose_376")];
+            tensor<fp16, [1, 16, ?, 64]> var_2984_cast_fp16 = matmul(transpose_x = var_2984_transpose_x_0, transpose_y = var_2984_transpose_y_0, x = var_2982_cast_fp16, y = v_135_cast_fp16)[name = string("op_2984_cast_fp16")];
+            tensor<int32, [4]> var_2985 = const()[name = string("op_2985"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_303x = const()[name = string("concat_303x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_2986_cast_fp16 = transpose(perm = var_2985, x = var_2984_cast_fp16)[name = string("transpose_373")];
+            tensor<fp16, [1, ?, 1024]> x_241_cast_fp16 = reshape(shape = concat_303x, x = var_2986_cast_fp16)[name = string("x_241_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2990_to_fp16 = const()[name = string("op_2990_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499038080)))];
+            tensor<fp16, [1024]> var_2991_to_fp16 = const()[name = string("op_2991_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501135296)))];
+            tensor<fp16, [1, ?, 1024]> linear_107_cast_fp16 = linear(bias = var_2991_to_fp16, weight = var_2990_to_fp16, x = x_241_cast_fp16)[name = string("linear_107_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_243_cast_fp16 = add(x = x_237_cast_fp16, y = linear_107_cast_fp16)[name = string("x_243_cast_fp16")];
+            tensor<int32, [1]> var_2998_axes_0 = const()[name = string("op_2998_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_13_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501137408)))];
+            tensor<fp16, [1024]> blocks_13_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501139520)))];
+            tensor<fp16, [1, ?, 1024]> var_2998_cast_fp16 = layer_norm(axes = var_2998_axes_0, beta = blocks_13_cross_attn_ln_bias_to_fp16, epsilon = var_2897_to_fp16, gamma = blocks_13_cross_attn_ln_weight_to_fp16, x = x_243_cast_fp16)[name = string("op_2998_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3007_to_fp16 = const()[name = string("op_3007_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501141632)))];
+            tensor<fp16, [1024]> var_3008_to_fp16 = const()[name = string("op_3008_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503238848)))];
+            tensor<fp16, [1, ?, 1024]> linear_108_cast_fp16 = linear(bias = var_3008_to_fp16, weight = var_3007_to_fp16, x = var_2998_cast_fp16)[name = string("linear_108_cast_fp16")];
+            tensor<int32, [3]> concat_304 = const()[name = string("concat_304"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_305 = const()[name = string("concat_305"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_137_internal_tensor_assign_1_stride_0 = const()[name = string("k_137_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_304, begin_mask = k_137_internal_tensor_assign_1_begin_mask_0, end = concat_305, end_mask = k_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_137_internal_tensor_assign_1_squeeze_mask_0, stride = k_137_internal_tensor_assign_1_stride_0, update = k_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("k_137_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_306 = const()[name = string("concat_306"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_307 = const()[name = string("concat_307"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_137_internal_tensor_assign_1_stride_0 = const()[name = string("v_137_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_306, begin_mask = v_137_internal_tensor_assign_1_begin_mask_0, end = concat_307, end_mask = v_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_137_internal_tensor_assign_1_squeeze_mask_0, stride = v_137_internal_tensor_assign_1_stride_0, update = v_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("v_137_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_308x = const()[name = string("concat_308x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3028_cast_fp16 = reshape(shape = concat_308x, x = linear_108_cast_fp16)[name = string("op_3028_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_111_cast_fp16 = mul(x = var_3028_cast_fp16, y = const_174_to_fp16)[name = string("q_111_cast_fp16")];
+            tensor<int32, [4]> var_3034 = const()[name = string("op_3034"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_3035_cast_fp16 = reshape(shape = var_3034, x = k_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3035_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_139_cast_fp16 = mul(x = var_3035_cast_fp16, y = const_175_to_fp16)[name = string("k_139_cast_fp16")];
+            tensor<int32, [4]> var_3041 = const()[name = string("op_3041"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_3042_cast_fp16 = reshape(shape = var_3041, x = v_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3042_cast_fp16")];
+            tensor<int32, [4]> var_3043 = const()[name = string("op_3043"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_83_transpose_x_0 = const()[name = string("qk_83_transpose_x_0"), val = bool(false)];
+            bool qk_83_transpose_y_0 = const()[name = string("qk_83_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_247_perm_0 = const()[name = string("transpose_247_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_248_perm_0 = const()[name = string("transpose_248_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_248 = transpose(perm = transpose_248_perm_0, x = k_139_cast_fp16)[name = string("transpose_370")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_247 = transpose(perm = transpose_247_perm_0, x = q_111_cast_fp16)[name = string("transpose_371")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_83_cast_fp16 = matmul(transpose_x = qk_83_transpose_x_0, transpose_y = qk_83_transpose_y_0, x = transpose_247, y = transpose_248)[name = string("qk_83_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_3047_cast_fp16 = softmax(axis = var_2891, x = qk_83_cast_fp16)[name = string("op_3047_cast_fp16")];
+            bool var_3049_transpose_x_0 = const()[name = string("op_3049_transpose_x_0"), val = bool(false)];
+            bool var_3049_transpose_y_0 = const()[name = string("op_3049_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_139_cast_fp16 = transpose(perm = var_3043, x = var_3042_cast_fp16)[name = string("transpose_372")];
+            tensor<fp16, [1, 16, ?, 64]> var_3049_cast_fp16 = matmul(transpose_x = var_3049_transpose_x_0, transpose_y = var_3049_transpose_y_0, x = var_3047_cast_fp16, y = v_139_cast_fp16)[name = string("op_3049_cast_fp16")];
+            tensor<int32, [4]> var_3050 = const()[name = string("op_3050"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_309x = const()[name = string("concat_309x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_3051_cast_fp16 = transpose(perm = var_3050, x = var_3049_cast_fp16)[name = string("transpose_369")];
+            tensor<fp16, [1, ?, 1024]> x_247_cast_fp16 = reshape(shape = concat_309x, x = var_3051_cast_fp16)[name = string("x_247_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3055_to_fp16 = const()[name = string("op_3055_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503240960)))];
+            tensor<fp16, [1024]> var_3056_to_fp16 = const()[name = string("op_3056_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505338176)))];
+            tensor<fp16, [1, ?, 1024]> linear_109_cast_fp16 = linear(bias = var_3056_to_fp16, weight = var_3055_to_fp16, x = x_247_cast_fp16)[name = string("linear_109_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_249_cast_fp16 = add(x = x_243_cast_fp16, y = linear_109_cast_fp16)[name = string("x_249_cast_fp16")];
+            tensor<int32, [1]> var_3063_axes_0 = const()[name = string("op_3063_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505340288)))];
+            tensor<fp16, [1024]> blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505342400)))];
+            tensor<fp16, [1, ?, 1024]> var_3063_cast_fp16 = layer_norm(axes = var_3063_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_2897_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_249_cast_fp16)[name = string("op_3063_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_3072_to_fp16 = const()[name = string("op_3072_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505344512)))];
+            tensor<fp16, [4096]> var_3073_to_fp16 = const()[name = string("op_3073_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513733184)))];
+            tensor<fp16, [1, ?, 4096]> linear_110_cast_fp16 = linear(bias = var_3073_to_fp16, weight = var_3072_to_fp16, x = var_3063_cast_fp16)[name = string("linear_110_cast_fp16")];
+            string x_253_mode_0 = const()[name = string("x_253_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_253_cast_fp16 = gelu(mode = x_253_mode_0, x = linear_110_cast_fp16)[name = string("x_253_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_3078_to_fp16 = const()[name = string("op_3078_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513741440)))];
+            tensor<fp16, [1024]> var_3079_to_fp16 = const()[name = string("op_3079_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522130112)))];
+            tensor<fp16, [1, ?, 1024]> linear_111_cast_fp16 = linear(bias = var_3079_to_fp16, weight = var_3078_to_fp16, x = x_253_cast_fp16)[name = string("linear_111_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_255_cast_fp16 = add(x = x_249_cast_fp16, y = linear_111_cast_fp16)[name = string("x_255_cast_fp16")];
+            tensor<int32, [4]> k_cache_57_begin_0 = const()[name = string("k_cache_57_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_57_end_0 = const()[name = string("k_cache_57_end_0"), val = tensor<int32, [4]>([15, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_57_end_mask_0 = const()[name = string("k_cache_57_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_57_squeeze_mask_0 = const()[name = string("k_cache_57_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_57_cast_fp16 = slice_by_index(begin = k_cache_57_begin_0, end = k_cache_57_end_0, end_mask = k_cache_57_end_mask_0, squeeze_mask = k_cache_57_squeeze_mask_0, x = coreml_update_state_74)[name = string("k_cache_57_cast_fp16")];
+            tensor<int32, [4]> v_cache_57_begin_0 = const()[name = string("v_cache_57_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_57_end_0 = const()[name = string("v_cache_57_end_0"), val = tensor<int32, [4]>([15, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_57_end_mask_0 = const()[name = string("v_cache_57_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_57_squeeze_mask_0 = const()[name = string("v_cache_57_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_57_cast_fp16 = slice_by_index(begin = v_cache_57_begin_0, end = v_cache_57_end_0, end_mask = v_cache_57_end_mask_0, squeeze_mask = v_cache_57_squeeze_mask_0, x = coreml_update_state_75)[name = string("v_cache_57_cast_fp16")];
+            tensor<int32, [4]> k_cache_59_begin_0 = const()[name = string("k_cache_59_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_59_end_0 = const()[name = string("k_cache_59_end_0"), val = tensor<int32, [4]>([15, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_59_end_mask_0 = const()[name = string("k_cache_59_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_59_squeeze_mask_0 = const()[name = string("k_cache_59_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_59_cast_fp16 = slice_by_index(begin = k_cache_59_begin_0, end = k_cache_59_end_0, end_mask = k_cache_59_end_mask_0, squeeze_mask = k_cache_59_squeeze_mask_0, x = read_state_2)[name = string("k_cache_59_cast_fp16")];
+            tensor<int32, [4]> v_cache_59_begin_0 = const()[name = string("v_cache_59_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_59_end_0 = const()[name = string("v_cache_59_end_0"), val = tensor<int32, [4]>([15, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_59_end_mask_0 = const()[name = string("v_cache_59_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_59_squeeze_mask_0 = const()[name = string("v_cache_59_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_59_cast_fp16 = slice_by_index(begin = v_cache_59_begin_0, end = v_cache_59_end_0, end_mask = v_cache_59_end_mask_0, squeeze_mask = v_cache_59_squeeze_mask_0, x = read_state_3)[name = string("v_cache_59_cast_fp16")];
+            int32 var_3102 = const()[name = string("op_3102"), val = int32(-1)];
+            tensor<int32, [1]> var_3120_axes_0 = const()[name = string("op_3120_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522132224)))];
+            tensor<fp16, [1024]> blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522134336)))];
+            fp16 var_3108_to_fp16 = const()[name = string("op_3108_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_3120_cast_fp16 = layer_norm(axes = var_3120_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_3108_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_255_cast_fp16)[name = string("op_3120_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3131_to_fp16 = const()[name = string("op_3131_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522136448)))];
+            tensor<fp16, [1024]> var_3132_to_fp16 = const()[name = string("op_3132_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524233664)))];
+            tensor<fp16, [1, ?, 1024]> linear_112_cast_fp16 = linear(bias = var_3132_to_fp16, weight = var_3131_to_fp16, x = var_3120_cast_fp16)[name = string("linear_112_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3135_to_fp16 = const()[name = string("op_3135_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524235776)))];
+            tensor<fp16, [1, ?, 1024]> linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3135_to_fp16, x = var_3120_cast_fp16)[name = string("linear_113_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3139_to_fp16 = const()[name = string("op_3139_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526332992)))];
+            tensor<fp16, [1024]> var_3140_to_fp16 = const()[name = string("op_3140_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528430208)))];
+            tensor<fp16, [1, ?, 1024]> linear_114_cast_fp16 = linear(bias = var_3140_to_fp16, weight = var_3139_to_fp16, x = var_3120_cast_fp16)[name = string("linear_114_cast_fp16")];
+            tensor<int32, [3]> var_3142_shape_cast_fp16 = shape(x = linear_112_cast_fp16)[name = string("op_3142_shape_cast_fp16")];
+            int32 gather_170_axis_0 = const()[name = string("gather_170_axis_0"), val = int32(0)];
+            int32 gather_170_batch_dims_0 = const()[name = string("gather_170_batch_dims_0"), val = int32(0)];
+            bool gather_170_validate_indices_0 = const()[name = string("gather_170_validate_indices_0"), val = bool(false)];
+            string var_3142_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3142_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_170_to_uint16 = const()[name = string("select_170_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3142_shape_cast_fp16_to_uint16 = cast(dtype = var_3142_shape_cast_fp16_to_uint16_dtype_0, x = var_3142_shape_cast_fp16)[name = string("cast_266")];
+            uint16 gather_170_cast_uint16 = gather(axis = gather_170_axis_0, batch_dims = gather_170_batch_dims_0, indices = select_170_to_uint16, validate_indices = gather_170_validate_indices_0, x = var_3142_shape_cast_fp16_to_uint16)[name = string("gather_170_cast_uint16")];
+            string gather_170_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_170_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_170_cast_uint16_to_int32 = cast(dtype = gather_170_cast_uint16_to_int32_dtype_0, x = gather_170_cast_uint16)[name = string("cast_265")];
+            int32 end_step_31 = add(x = offset, y = gather_170_cast_uint16_to_int32)[name = string("end_step_31")];
+            tensor<int32, [1]> expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = end_step_31)[name = string("expand_dims_227")];
+            tensor<int32, [1]> concat_312_values0_0 = const()[name = string("concat_312_values0_0"), val = tensor<int32, [1]>([14])];
+            int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)];
+            bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (concat_312_values0_0, expand_dims_224, expand_dims_1, expand_dims_226))[name = string("concat_312")];
+            tensor<int32, [1]> concat_313_values0_0 = const()[name = string("concat_313_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)];
+            bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (concat_313_values0_0, concat_313_values1_0, expand_dims_227, concat_313_values3_0))[name = string("concat_313")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = k_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = k_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_15_stride_0, update = linear_113_cast_fp16, x = coreml_update_state_74)[name = string("k_cache1_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_15_cast_fp16, input = k_cache1)[name = string("coreml_update_state_76_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_76 = read_state(input = k_cache1)[name = string("coreml_update_state_76")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_15_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = v_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = v_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_15_stride_0, update = linear_114_cast_fp16, x = coreml_update_state_75)[name = string("v_cache1_internal_tensor_assign_15_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_15_cast_fp16, input = v_cache1)[name = string("coreml_update_state_77_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_77 = read_state(input = v_cache1)[name = string("coreml_update_state_77")];
+            int32 concat_318_values0_0 = const()[name = string("concat_318_values0_0"), val = int32(1)];
+            int32 concat_318_values2_0 = const()[name = string("concat_318_values2_0"), val = int32(1024)];
+            int32 concat_318_axis_0 = const()[name = string("concat_318_axis_0"), val = int32(0)];
+            bool concat_318_interleave_0 = const()[name = string("concat_318_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_318 = concat(axis = concat_318_axis_0, interleave = concat_318_interleave_0, values = (concat_318_values0_0, end_step_31, concat_318_values2_0))[name = string("concat_318")];
+            tensor<int32, [3]> var_3158_begin_0 = const()[name = string("op_3158_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3158_end_mask_0 = const()[name = string("op_3158_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_3158_cast_fp16 = slice_by_index(begin = var_3158_begin_0, end = concat_318, end_mask = var_3158_end_mask_0, x = k_cache_57_cast_fp16)[name = string("op_3158_cast_fp16")];
+            tensor<int32, [3]> var_3161_begin_0 = const()[name = string("op_3161_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3161_end_mask_0 = const()[name = string("op_3161_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_3161_cast_fp16 = slice_by_index(begin = var_3161_begin_0, end = concat_318, end_mask = var_3161_end_mask_0, x = v_cache_57_cast_fp16)[name = string("op_3161_cast_fp16")];
+            tensor<int32, [4]> concat_320x = const()[name = string("concat_320x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3171_cast_fp16 = reshape(shape = concat_320x, x = linear_112_cast_fp16)[name = string("op_3171_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_115_cast_fp16 = mul(x = var_3171_cast_fp16, y = const_176_to_fp16)[name = string("q_115_cast_fp16")];
+            tensor<int32, [4]> concat_321x = const()[name = string("concat_321x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3178_cast_fp16 = reshape(shape = concat_321x, x = var_3158_cast_fp16)[name = string("op_3178_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_145_cast_fp16 = mul(x = var_3178_cast_fp16, y = const_177_to_fp16)[name = string("k_145_cast_fp16")];
+            tensor<int32, [4]> concat_322x = const()[name = string("concat_322x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3185_cast_fp16 = reshape(shape = concat_322x, x = var_3161_cast_fp16)[name = string("op_3185_cast_fp16")];
+            tensor<int32, [4]> var_3186 = const()[name = string("op_3186"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_85_transpose_x_0 = const()[name = string("qk_85_transpose_x_0"), val = bool(false)];
+            bool qk_85_transpose_y_0 = const()[name = string("qk_85_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_249_perm_0 = const()[name = string("transpose_249_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_250_perm_0 = const()[name = string("transpose_250_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_250 = transpose(perm = transpose_250_perm_0, x = k_145_cast_fp16)[name = string("transpose_366")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_249 = transpose(perm = transpose_249_perm_0, x = q_115_cast_fp16)[name = string("transpose_367")];
+            tensor<fp16, [1, 16, ?, ?]> qk_85_cast_fp16 = matmul(transpose_x = qk_85_transpose_x_0, transpose_y = qk_85_transpose_y_0, x = transpose_249, y = transpose_250)[name = string("qk_85_cast_fp16")];
+            int32 concat_323_values1_0 = const()[name = string("concat_323_values1_0"), val = int32(448)];
+            int32 concat_323_axis_0 = const()[name = string("concat_323_axis_0"), val = int32(0)];
+            bool concat_323_interleave_0 = const()[name = string("concat_323_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_323 = concat(axis = concat_323_axis_0, interleave = concat_323_interleave_0, values = (gather_170_cast_uint16_to_int32, concat_323_values1_0))[name = string("concat_323")];
+            tensor<int32, [2]> var_3189_begin_0 = const()[name = string("op_3189_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3189_end_mask_0 = const()[name = string("op_3189_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3189_cast_fp16 = slice_by_index(begin = var_3189_begin_0, end = concat_323, end_mask = var_3189_end_mask_0, x = mask_to_fp16)[name = string("op_3189_cast_fp16")];
+            int32 concat_324_values0_0 = const()[name = string("concat_324_values0_0"), val = int32(0)];
+            int32 concat_324_axis_0 = const()[name = string("concat_324_axis_0"), val = int32(0)];
+            bool concat_324_interleave_0 = const()[name = string("concat_324_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_324 = concat(axis = concat_324_axis_0, interleave = concat_324_interleave_0, values = (concat_324_values0_0, gather_170_cast_uint16_to_int32))[name = string("concat_324")];
+            tensor<int32, [2]> var_3190_begin_0 = const()[name = string("op_3190_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3190_end_mask_0 = const()[name = string("op_3190_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3190_cast_fp16 = slice_by_index(begin = var_3190_begin_0, end = concat_324, end_mask = var_3190_end_mask_0, x = var_3189_cast_fp16)[name = string("op_3190_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_87_cast_fp16 = add(x = qk_85_cast_fp16, y = var_3190_cast_fp16)[name = string("qk_87_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_3193_cast_fp16 = softmax(axis = var_3102, x = qk_87_cast_fp16)[name = string("op_3193_cast_fp16")];
+            bool var_3195_transpose_x_0 = const()[name = string("op_3195_transpose_x_0"), val = bool(false)];
+            bool var_3195_transpose_y_0 = const()[name = string("op_3195_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_145_cast_fp16 = transpose(perm = var_3186, x = var_3185_cast_fp16)[name = string("transpose_368")];
+            tensor<fp16, [1, 16, ?, 64]> var_3195_cast_fp16 = matmul(transpose_x = var_3195_transpose_x_0, transpose_y = var_3195_transpose_y_0, x = var_3193_cast_fp16, y = v_145_cast_fp16)[name = string("op_3195_cast_fp16")];
+            tensor<int32, [4]> var_3196 = const()[name = string("op_3196"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_325x = const()[name = string("concat_325x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_3197_cast_fp16 = transpose(perm = var_3196, x = var_3195_cast_fp16)[name = string("transpose_365")];
+            tensor<fp16, [1, ?, 1024]> x_259_cast_fp16 = reshape(shape = concat_325x, x = var_3197_cast_fp16)[name = string("x_259_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3201_to_fp16 = const()[name = string("op_3201_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528432320)))];
+            tensor<fp16, [1024]> var_3202_to_fp16 = const()[name = string("op_3202_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530529536)))];
+            tensor<fp16, [1, ?, 1024]> linear_115_cast_fp16 = linear(bias = var_3202_to_fp16, weight = var_3201_to_fp16, x = x_259_cast_fp16)[name = string("linear_115_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_261_cast_fp16 = add(x = x_255_cast_fp16, y = linear_115_cast_fp16)[name = string("x_261_cast_fp16")];
+            tensor<int32, [1]> var_3209_axes_0 = const()[name = string("op_3209_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_14_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530531648)))];
+            tensor<fp16, [1024]> blocks_14_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530533760)))];
+            tensor<fp16, [1, ?, 1024]> var_3209_cast_fp16 = layer_norm(axes = var_3209_axes_0, beta = blocks_14_cross_attn_ln_bias_to_fp16, epsilon = var_3108_to_fp16, gamma = blocks_14_cross_attn_ln_weight_to_fp16, x = x_261_cast_fp16)[name = string("op_3209_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3218_to_fp16 = const()[name = string("op_3218_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530535872)))];
+            tensor<fp16, [1024]> var_3219_to_fp16 = const()[name = string("op_3219_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532633088)))];
+            tensor<fp16, [1, ?, 1024]> linear_116_cast_fp16 = linear(bias = var_3219_to_fp16, weight = var_3218_to_fp16, x = var_3209_cast_fp16)[name = string("linear_116_cast_fp16")];
+            tensor<int32, [3]> concat_326 = const()[name = string("concat_326"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_327 = const()[name = string("concat_327"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_147_internal_tensor_assign_1_stride_0 = const()[name = string("k_147_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_326, begin_mask = k_147_internal_tensor_assign_1_begin_mask_0, end = concat_327, end_mask = k_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_147_internal_tensor_assign_1_squeeze_mask_0, stride = k_147_internal_tensor_assign_1_stride_0, update = k_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("k_147_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_328 = const()[name = string("concat_328"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_329 = const()[name = string("concat_329"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_147_internal_tensor_assign_1_stride_0 = const()[name = string("v_147_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_328, begin_mask = v_147_internal_tensor_assign_1_begin_mask_0, end = concat_329, end_mask = v_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_147_internal_tensor_assign_1_squeeze_mask_0, stride = v_147_internal_tensor_assign_1_stride_0, update = v_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("v_147_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_330x = const()[name = string("concat_330x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3239_cast_fp16 = reshape(shape = concat_330x, x = linear_116_cast_fp16)[name = string("op_3239_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_119_cast_fp16 = mul(x = var_3239_cast_fp16, y = const_178_to_fp16)[name = string("q_119_cast_fp16")];
+            tensor<int32, [4]> var_3245 = const()[name = string("op_3245"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_3246_cast_fp16 = reshape(shape = var_3245, x = k_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3246_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_149_cast_fp16 = mul(x = var_3246_cast_fp16, y = const_179_to_fp16)[name = string("k_149_cast_fp16")];
+            tensor<int32, [4]> var_3252 = const()[name = string("op_3252"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_3253_cast_fp16 = reshape(shape = var_3252, x = v_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3253_cast_fp16")];
+            tensor<int32, [4]> var_3254 = const()[name = string("op_3254"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_89_transpose_x_0 = const()[name = string("qk_89_transpose_x_0"), val = bool(false)];
+            bool qk_89_transpose_y_0 = const()[name = string("qk_89_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_251_perm_0 = const()[name = string("transpose_251_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_252_perm_0 = const()[name = string("transpose_252_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_252 = transpose(perm = transpose_252_perm_0, x = k_149_cast_fp16)[name = string("transpose_362")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_251 = transpose(perm = transpose_251_perm_0, x = q_119_cast_fp16)[name = string("transpose_363")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_89_cast_fp16 = matmul(transpose_x = qk_89_transpose_x_0, transpose_y = qk_89_transpose_y_0, x = transpose_251, y = transpose_252)[name = string("qk_89_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_3258_cast_fp16 = softmax(axis = var_3102, x = qk_89_cast_fp16)[name = string("op_3258_cast_fp16")];
+            bool var_3260_transpose_x_0 = const()[name = string("op_3260_transpose_x_0"), val = bool(false)];
+            bool var_3260_transpose_y_0 = const()[name = string("op_3260_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_149_cast_fp16 = transpose(perm = var_3254, x = var_3253_cast_fp16)[name = string("transpose_364")];
+            tensor<fp16, [1, 16, ?, 64]> var_3260_cast_fp16 = matmul(transpose_x = var_3260_transpose_x_0, transpose_y = var_3260_transpose_y_0, x = var_3258_cast_fp16, y = v_149_cast_fp16)[name = string("op_3260_cast_fp16")];
+            tensor<int32, [4]> var_3261 = const()[name = string("op_3261"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_331x = const()[name = string("concat_331x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_3262_cast_fp16 = transpose(perm = var_3261, x = var_3260_cast_fp16)[name = string("transpose_361")];
+            tensor<fp16, [1, ?, 1024]> x_265_cast_fp16 = reshape(shape = concat_331x, x = var_3262_cast_fp16)[name = string("x_265_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3266_to_fp16 = const()[name = string("op_3266_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532635200)))];
+            tensor<fp16, [1024]> var_3267_to_fp16 = const()[name = string("op_3267_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534732416)))];
+            tensor<fp16, [1, ?, 1024]> linear_117_cast_fp16 = linear(bias = var_3267_to_fp16, weight = var_3266_to_fp16, x = x_265_cast_fp16)[name = string("linear_117_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_267_cast_fp16 = add(x = x_261_cast_fp16, y = linear_117_cast_fp16)[name = string("x_267_cast_fp16")];
+            tensor<int32, [1]> var_3274_axes_0 = const()[name = string("op_3274_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534734528)))];
+            tensor<fp16, [1024]> blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534736640)))];
+            tensor<fp16, [1, ?, 1024]> var_3274_cast_fp16 = layer_norm(axes = var_3274_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_3108_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_267_cast_fp16)[name = string("op_3274_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_3283_to_fp16 = const()[name = string("op_3283_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534738752)))];
+            tensor<fp16, [4096]> var_3284_to_fp16 = const()[name = string("op_3284_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543127424)))];
+            tensor<fp16, [1, ?, 4096]> linear_118_cast_fp16 = linear(bias = var_3284_to_fp16, weight = var_3283_to_fp16, x = var_3274_cast_fp16)[name = string("linear_118_cast_fp16")];
+            string x_271_mode_0 = const()[name = string("x_271_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_271_cast_fp16 = gelu(mode = x_271_mode_0, x = linear_118_cast_fp16)[name = string("x_271_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_3289_to_fp16 = const()[name = string("op_3289_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543135680)))];
+            tensor<fp16, [1024]> var_3290_to_fp16 = const()[name = string("op_3290_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551524352)))];
+            tensor<fp16, [1, ?, 1024]> linear_119_cast_fp16 = linear(bias = var_3290_to_fp16, weight = var_3289_to_fp16, x = x_271_cast_fp16)[name = string("linear_119_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_273_cast_fp16 = add(x = x_267_cast_fp16, y = linear_119_cast_fp16)[name = string("x_273_cast_fp16")];
+            tensor<int32, [4]> k_cache_61_begin_0 = const()[name = string("k_cache_61_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_61_end_0 = const()[name = string("k_cache_61_end_0"), val = tensor<int32, [4]>([16, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_61_end_mask_0 = const()[name = string("k_cache_61_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_61_squeeze_mask_0 = const()[name = string("k_cache_61_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_61_cast_fp16 = slice_by_index(begin = k_cache_61_begin_0, end = k_cache_61_end_0, end_mask = k_cache_61_end_mask_0, squeeze_mask = k_cache_61_squeeze_mask_0, x = coreml_update_state_76)[name = string("k_cache_61_cast_fp16")];
+            tensor<int32, [4]> v_cache_61_begin_0 = const()[name = string("v_cache_61_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_61_end_0 = const()[name = string("v_cache_61_end_0"), val = tensor<int32, [4]>([16, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_61_end_mask_0 = const()[name = string("v_cache_61_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_61_squeeze_mask_0 = const()[name = string("v_cache_61_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_61_cast_fp16 = slice_by_index(begin = v_cache_61_begin_0, end = v_cache_61_end_0, end_mask = v_cache_61_end_mask_0, squeeze_mask = v_cache_61_squeeze_mask_0, x = coreml_update_state_77)[name = string("v_cache_61_cast_fp16")];
+            tensor<int32, [4]> k_cache_63_begin_0 = const()[name = string("k_cache_63_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_63_end_0 = const()[name = string("k_cache_63_end_0"), val = tensor<int32, [4]>([16, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_63_end_mask_0 = const()[name = string("k_cache_63_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_63_squeeze_mask_0 = const()[name = string("k_cache_63_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_63_cast_fp16 = slice_by_index(begin = k_cache_63_begin_0, end = k_cache_63_end_0, end_mask = k_cache_63_end_mask_0, squeeze_mask = k_cache_63_squeeze_mask_0, x = read_state_2)[name = string("k_cache_63_cast_fp16")];
+            tensor<int32, [4]> v_cache_63_begin_0 = const()[name = string("v_cache_63_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_63_end_0 = const()[name = string("v_cache_63_end_0"), val = tensor<int32, [4]>([16, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_63_end_mask_0 = const()[name = string("v_cache_63_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_63_squeeze_mask_0 = const()[name = string("v_cache_63_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_63_cast_fp16 = slice_by_index(begin = v_cache_63_begin_0, end = v_cache_63_end_0, end_mask = v_cache_63_end_mask_0, squeeze_mask = v_cache_63_squeeze_mask_0, x = read_state_3)[name = string("v_cache_63_cast_fp16")];
+            int32 var_3313 = const()[name = string("op_3313"), val = int32(-1)];
+            tensor<int32, [1]> var_3331_axes_0 = const()[name = string("op_3331_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551526464)))];
+            tensor<fp16, [1024]> blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551528576)))];
+            fp16 var_3319_to_fp16 = const()[name = string("op_3319_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_3331_cast_fp16 = layer_norm(axes = var_3331_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_3319_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_273_cast_fp16)[name = string("op_3331_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3342_to_fp16 = const()[name = string("op_3342_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551530688)))];
+            tensor<fp16, [1024]> var_3343_to_fp16 = const()[name = string("op_3343_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553627904)))];
+            tensor<fp16, [1, ?, 1024]> linear_120_cast_fp16 = linear(bias = var_3343_to_fp16, weight = var_3342_to_fp16, x = var_3331_cast_fp16)[name = string("linear_120_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3346_to_fp16 = const()[name = string("op_3346_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553630016)))];
+            tensor<fp16, [1, ?, 1024]> linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3346_to_fp16, x = var_3331_cast_fp16)[name = string("linear_121_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3350_to_fp16 = const()[name = string("op_3350_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555727232)))];
+            tensor<fp16, [1024]> var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557824448)))];
+            tensor<fp16, [1, ?, 1024]> linear_122_cast_fp16 = linear(bias = var_3351_to_fp16, weight = var_3350_to_fp16, x = var_3331_cast_fp16)[name = string("linear_122_cast_fp16")];
+            tensor<int32, [3]> var_3353_shape_cast_fp16 = shape(x = linear_120_cast_fp16)[name = string("op_3353_shape_cast_fp16")];
+            int32 gather_182_axis_0 = const()[name = string("gather_182_axis_0"), val = int32(0)];
+            int32 gather_182_batch_dims_0 = const()[name = string("gather_182_batch_dims_0"), val = int32(0)];
+            bool gather_182_validate_indices_0 = const()[name = string("gather_182_validate_indices_0"), val = bool(false)];
+            string var_3353_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3353_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_182_to_uint16 = const()[name = string("select_182_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3353_shape_cast_fp16_to_uint16 = cast(dtype = var_3353_shape_cast_fp16_to_uint16_dtype_0, x = var_3353_shape_cast_fp16)[name = string("cast_264")];
+            uint16 gather_182_cast_uint16 = gather(axis = gather_182_axis_0, batch_dims = gather_182_batch_dims_0, indices = select_182_to_uint16, validate_indices = gather_182_validate_indices_0, x = var_3353_shape_cast_fp16_to_uint16)[name = string("gather_182_cast_uint16")];
+            string gather_182_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_182_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_182_cast_uint16_to_int32 = cast(dtype = gather_182_cast_uint16_to_int32_dtype_0, x = gather_182_cast_uint16)[name = string("cast_263")];
+            int32 end_step_33 = add(x = offset, y = gather_182_cast_uint16_to_int32)[name = string("end_step_33")];
+            tensor<int32, [1]> expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = end_step_33)[name = string("expand_dims_243")];
+            tensor<int32, [1]> concat_334_values0_0 = const()[name = string("concat_334_values0_0"), val = tensor<int32, [1]>([15])];
+            int32 concat_334_axis_0 = const()[name = string("concat_334_axis_0"), val = int32(0)];
+            bool concat_334_interleave_0 = const()[name = string("concat_334_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_334 = concat(axis = concat_334_axis_0, interleave = concat_334_interleave_0, values = (concat_334_values0_0, expand_dims_240, expand_dims_1, expand_dims_242))[name = string("concat_334")];
+            tensor<int32, [1]> concat_335_values0_0 = const()[name = string("concat_335_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_335_values1_0 = const()[name = string("concat_335_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_335_values3_0 = const()[name = string("concat_335_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_335_axis_0 = const()[name = string("concat_335_axis_0"), val = int32(0)];
+            bool concat_335_interleave_0 = const()[name = string("concat_335_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_335 = concat(axis = concat_335_axis_0, interleave = concat_335_interleave_0, values = (concat_335_values0_0, concat_335_values1_0, expand_dims_243, concat_335_values3_0))[name = string("concat_335")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = k_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = k_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_16_stride_0, update = linear_121_cast_fp16, x = coreml_update_state_76)[name = string("k_cache1_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_16_cast_fp16, input = k_cache1)[name = string("coreml_update_state_78_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_78 = read_state(input = k_cache1)[name = string("coreml_update_state_78")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_16_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = v_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = v_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_16_stride_0, update = linear_122_cast_fp16, x = coreml_update_state_77)[name = string("v_cache1_internal_tensor_assign_16_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_16_cast_fp16, input = v_cache1)[name = string("coreml_update_state_79_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_79 = read_state(input = v_cache1)[name = string("coreml_update_state_79")];
+            int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)];
+            int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(1024)];
+            int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)];
+            bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, end_step_33, concat_340_values2_0))[name = string("concat_340")];
+            tensor<int32, [3]> var_3369_begin_0 = const()[name = string("op_3369_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3369_end_mask_0 = const()[name = string("op_3369_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_3369_cast_fp16 = slice_by_index(begin = var_3369_begin_0, end = concat_340, end_mask = var_3369_end_mask_0, x = k_cache_61_cast_fp16)[name = string("op_3369_cast_fp16")];
+            tensor<int32, [3]> var_3372_begin_0 = const()[name = string("op_3372_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3372_end_mask_0 = const()[name = string("op_3372_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_3372_cast_fp16 = slice_by_index(begin = var_3372_begin_0, end = concat_340, end_mask = var_3372_end_mask_0, x = v_cache_61_cast_fp16)[name = string("op_3372_cast_fp16")];
+            tensor<int32, [4]> concat_342x = const()[name = string("concat_342x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3382_cast_fp16 = reshape(shape = concat_342x, x = linear_120_cast_fp16)[name = string("op_3382_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_123_cast_fp16 = mul(x = var_3382_cast_fp16, y = const_180_to_fp16)[name = string("q_123_cast_fp16")];
+            tensor<int32, [4]> concat_343x = const()[name = string("concat_343x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3389_cast_fp16 = reshape(shape = concat_343x, x = var_3369_cast_fp16)[name = string("op_3389_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_155_cast_fp16 = mul(x = var_3389_cast_fp16, y = const_181_to_fp16)[name = string("k_155_cast_fp16")];
+            tensor<int32, [4]> concat_344x = const()[name = string("concat_344x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3396_cast_fp16 = reshape(shape = concat_344x, x = var_3372_cast_fp16)[name = string("op_3396_cast_fp16")];
+            tensor<int32, [4]> var_3397 = const()[name = string("op_3397"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_91_transpose_x_0 = const()[name = string("qk_91_transpose_x_0"), val = bool(false)];
+            bool qk_91_transpose_y_0 = const()[name = string("qk_91_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_253_perm_0 = const()[name = string("transpose_253_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_254_perm_0 = const()[name = string("transpose_254_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_254 = transpose(perm = transpose_254_perm_0, x = k_155_cast_fp16)[name = string("transpose_358")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_253 = transpose(perm = transpose_253_perm_0, x = q_123_cast_fp16)[name = string("transpose_359")];
+            tensor<fp16, [1, 16, ?, ?]> qk_91_cast_fp16 = matmul(transpose_x = qk_91_transpose_x_0, transpose_y = qk_91_transpose_y_0, x = transpose_253, y = transpose_254)[name = string("qk_91_cast_fp16")];
+            int32 concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = int32(448)];
+            int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)];
+            bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (gather_182_cast_uint16_to_int32, concat_345_values1_0))[name = string("concat_345")];
+            tensor<int32, [2]> var_3400_begin_0 = const()[name = string("op_3400_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3400_end_mask_0 = const()[name = string("op_3400_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3400_cast_fp16 = slice_by_index(begin = var_3400_begin_0, end = concat_345, end_mask = var_3400_end_mask_0, x = mask_to_fp16)[name = string("op_3400_cast_fp16")];
+            int32 concat_346_values0_0 = const()[name = string("concat_346_values0_0"), val = int32(0)];
+            int32 concat_346_axis_0 = const()[name = string("concat_346_axis_0"), val = int32(0)];
+            bool concat_346_interleave_0 = const()[name = string("concat_346_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_346 = concat(axis = concat_346_axis_0, interleave = concat_346_interleave_0, values = (concat_346_values0_0, gather_182_cast_uint16_to_int32))[name = string("concat_346")];
+            tensor<int32, [2]> var_3401_begin_0 = const()[name = string("op_3401_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3401_end_mask_0 = const()[name = string("op_3401_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3401_cast_fp16 = slice_by_index(begin = var_3401_begin_0, end = concat_346, end_mask = var_3401_end_mask_0, x = var_3400_cast_fp16)[name = string("op_3401_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_93_cast_fp16 = add(x = qk_91_cast_fp16, y = var_3401_cast_fp16)[name = string("qk_93_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_3404_cast_fp16 = softmax(axis = var_3313, x = qk_93_cast_fp16)[name = string("op_3404_cast_fp16")];
+            bool var_3406_transpose_x_0 = const()[name = string("op_3406_transpose_x_0"), val = bool(false)];
+            bool var_3406_transpose_y_0 = const()[name = string("op_3406_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_155_cast_fp16 = transpose(perm = var_3397, x = var_3396_cast_fp16)[name = string("transpose_360")];
+            tensor<fp16, [1, 16, ?, 64]> var_3406_cast_fp16 = matmul(transpose_x = var_3406_transpose_x_0, transpose_y = var_3406_transpose_y_0, x = var_3404_cast_fp16, y = v_155_cast_fp16)[name = string("op_3406_cast_fp16")];
+            tensor<int32, [4]> var_3407 = const()[name = string("op_3407"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_347x = const()[name = string("concat_347x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_3408_cast_fp16 = transpose(perm = var_3407, x = var_3406_cast_fp16)[name = string("transpose_357")];
+            tensor<fp16, [1, ?, 1024]> x_277_cast_fp16 = reshape(shape = concat_347x, x = var_3408_cast_fp16)[name = string("x_277_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3412_to_fp16 = const()[name = string("op_3412_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557826560)))];
+            tensor<fp16, [1024]> var_3413_to_fp16 = const()[name = string("op_3413_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559923776)))];
+            tensor<fp16, [1, ?, 1024]> linear_123_cast_fp16 = linear(bias = var_3413_to_fp16, weight = var_3412_to_fp16, x = x_277_cast_fp16)[name = string("linear_123_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_279_cast_fp16 = add(x = x_273_cast_fp16, y = linear_123_cast_fp16)[name = string("x_279_cast_fp16")];
+            tensor<int32, [1]> var_3420_axes_0 = const()[name = string("op_3420_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_15_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559925888)))];
+            tensor<fp16, [1024]> blocks_15_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559928000)))];
+            tensor<fp16, [1, ?, 1024]> var_3420_cast_fp16 = layer_norm(axes = var_3420_axes_0, beta = blocks_15_cross_attn_ln_bias_to_fp16, epsilon = var_3319_to_fp16, gamma = blocks_15_cross_attn_ln_weight_to_fp16, x = x_279_cast_fp16)[name = string("op_3420_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3429_to_fp16 = const()[name = string("op_3429_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559930112)))];
+            tensor<fp16, [1024]> var_3430_to_fp16 = const()[name = string("op_3430_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562027328)))];
+            tensor<fp16, [1, ?, 1024]> linear_124_cast_fp16 = linear(bias = var_3430_to_fp16, weight = var_3429_to_fp16, x = var_3420_cast_fp16)[name = string("linear_124_cast_fp16")];
+            tensor<int32, [3]> concat_348 = const()[name = string("concat_348"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_349 = const()[name = string("concat_349"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_157_internal_tensor_assign_1_stride_0 = const()[name = string("k_157_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_348, begin_mask = k_157_internal_tensor_assign_1_begin_mask_0, end = concat_349, end_mask = k_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_157_internal_tensor_assign_1_squeeze_mask_0, stride = k_157_internal_tensor_assign_1_stride_0, update = k_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("k_157_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_350 = const()[name = string("concat_350"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_351 = const()[name = string("concat_351"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_157_internal_tensor_assign_1_stride_0 = const()[name = string("v_157_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_350, begin_mask = v_157_internal_tensor_assign_1_begin_mask_0, end = concat_351, end_mask = v_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_157_internal_tensor_assign_1_squeeze_mask_0, stride = v_157_internal_tensor_assign_1_stride_0, update = v_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("v_157_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_352x = const()[name = string("concat_352x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3450_cast_fp16 = reshape(shape = concat_352x, x = linear_124_cast_fp16)[name = string("op_3450_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_127_cast_fp16 = mul(x = var_3450_cast_fp16, y = const_182_to_fp16)[name = string("q_127_cast_fp16")];
+            tensor<int32, [4]> var_3456 = const()[name = string("op_3456"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_3457_cast_fp16 = reshape(shape = var_3456, x = k_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3457_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_159_cast_fp16 = mul(x = var_3457_cast_fp16, y = const_183_to_fp16)[name = string("k_159_cast_fp16")];
+            tensor<int32, [4]> var_3463 = const()[name = string("op_3463"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_3464_cast_fp16 = reshape(shape = var_3463, x = v_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3464_cast_fp16")];
+            tensor<int32, [4]> var_3465 = const()[name = string("op_3465"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_95_transpose_x_0 = const()[name = string("qk_95_transpose_x_0"), val = bool(false)];
+            bool qk_95_transpose_y_0 = const()[name = string("qk_95_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_255_perm_0 = const()[name = string("transpose_255_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_256_perm_0 = const()[name = string("transpose_256_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_256 = transpose(perm = transpose_256_perm_0, x = k_159_cast_fp16)[name = string("transpose_354")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_255 = transpose(perm = transpose_255_perm_0, x = q_127_cast_fp16)[name = string("transpose_355")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_95_cast_fp16 = matmul(transpose_x = qk_95_transpose_x_0, transpose_y = qk_95_transpose_y_0, x = transpose_255, y = transpose_256)[name = string("qk_95_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_3469_cast_fp16 = softmax(axis = var_3313, x = qk_95_cast_fp16)[name = string("op_3469_cast_fp16")];
+            bool var_3471_transpose_x_0 = const()[name = string("op_3471_transpose_x_0"), val = bool(false)];
+            bool var_3471_transpose_y_0 = const()[name = string("op_3471_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_159_cast_fp16 = transpose(perm = var_3465, x = var_3464_cast_fp16)[name = string("transpose_356")];
+            tensor<fp16, [1, 16, ?, 64]> var_3471_cast_fp16 = matmul(transpose_x = var_3471_transpose_x_0, transpose_y = var_3471_transpose_y_0, x = var_3469_cast_fp16, y = v_159_cast_fp16)[name = string("op_3471_cast_fp16")];
+            tensor<int32, [4]> var_3472 = const()[name = string("op_3472"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_353x = const()[name = string("concat_353x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_3473_cast_fp16 = transpose(perm = var_3472, x = var_3471_cast_fp16)[name = string("transpose_353")];
+            tensor<fp16, [1, ?, 1024]> x_283_cast_fp16 = reshape(shape = concat_353x, x = var_3473_cast_fp16)[name = string("x_283_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3477_to_fp16 = const()[name = string("op_3477_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562029440)))];
+            tensor<fp16, [1024]> var_3478_to_fp16 = const()[name = string("op_3478_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564126656)))];
+            tensor<fp16, [1, ?, 1024]> linear_125_cast_fp16 = linear(bias = var_3478_to_fp16, weight = var_3477_to_fp16, x = x_283_cast_fp16)[name = string("linear_125_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_125_cast_fp16)[name = string("x_285_cast_fp16")];
+            tensor<int32, [1]> var_3485_axes_0 = const()[name = string("op_3485_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564128768)))];
+            tensor<fp16, [1024]> blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564130880)))];
+            tensor<fp16, [1, ?, 1024]> var_3485_cast_fp16 = layer_norm(axes = var_3485_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_3319_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_285_cast_fp16)[name = string("op_3485_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_3494_to_fp16 = const()[name = string("op_3494_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564132992)))];
+            tensor<fp16, [4096]> var_3495_to_fp16 = const()[name = string("op_3495_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572521664)))];
+            tensor<fp16, [1, ?, 4096]> linear_126_cast_fp16 = linear(bias = var_3495_to_fp16, weight = var_3494_to_fp16, x = var_3485_cast_fp16)[name = string("linear_126_cast_fp16")];
+            string x_289_mode_0 = const()[name = string("x_289_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_289_cast_fp16 = gelu(mode = x_289_mode_0, x = linear_126_cast_fp16)[name = string("x_289_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_3500_to_fp16 = const()[name = string("op_3500_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572529920)))];
+            tensor<fp16, [1024]> var_3501_to_fp16 = const()[name = string("op_3501_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580918592)))];
+            tensor<fp16, [1, ?, 1024]> linear_127_cast_fp16 = linear(bias = var_3501_to_fp16, weight = var_3500_to_fp16, x = x_289_cast_fp16)[name = string("linear_127_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_291_cast_fp16 = add(x = x_285_cast_fp16, y = linear_127_cast_fp16)[name = string("x_291_cast_fp16")];
+            tensor<int32, [4]> k_cache_65_begin_0 = const()[name = string("k_cache_65_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_65_end_0 = const()[name = string("k_cache_65_end_0"), val = tensor<int32, [4]>([17, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_65_end_mask_0 = const()[name = string("k_cache_65_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_65_squeeze_mask_0 = const()[name = string("k_cache_65_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_65_cast_fp16 = slice_by_index(begin = k_cache_65_begin_0, end = k_cache_65_end_0, end_mask = k_cache_65_end_mask_0, squeeze_mask = k_cache_65_squeeze_mask_0, x = coreml_update_state_78)[name = string("k_cache_65_cast_fp16")];
+            tensor<int32, [4]> v_cache_65_begin_0 = const()[name = string("v_cache_65_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_65_end_0 = const()[name = string("v_cache_65_end_0"), val = tensor<int32, [4]>([17, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_65_end_mask_0 = const()[name = string("v_cache_65_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_65_squeeze_mask_0 = const()[name = string("v_cache_65_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_65_cast_fp16 = slice_by_index(begin = v_cache_65_begin_0, end = v_cache_65_end_0, end_mask = v_cache_65_end_mask_0, squeeze_mask = v_cache_65_squeeze_mask_0, x = coreml_update_state_79)[name = string("v_cache_65_cast_fp16")];
+            tensor<int32, [4]> k_cache_67_begin_0 = const()[name = string("k_cache_67_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_67_end_0 = const()[name = string("k_cache_67_end_0"), val = tensor<int32, [4]>([17, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_67_end_mask_0 = const()[name = string("k_cache_67_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_67_squeeze_mask_0 = const()[name = string("k_cache_67_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_67_cast_fp16 = slice_by_index(begin = k_cache_67_begin_0, end = k_cache_67_end_0, end_mask = k_cache_67_end_mask_0, squeeze_mask = k_cache_67_squeeze_mask_0, x = read_state_2)[name = string("k_cache_67_cast_fp16")];
+            tensor<int32, [4]> v_cache_67_begin_0 = const()[name = string("v_cache_67_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_67_end_0 = const()[name = string("v_cache_67_end_0"), val = tensor<int32, [4]>([17, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_67_end_mask_0 = const()[name = string("v_cache_67_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_67_squeeze_mask_0 = const()[name = string("v_cache_67_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_67_cast_fp16 = slice_by_index(begin = v_cache_67_begin_0, end = v_cache_67_end_0, end_mask = v_cache_67_end_mask_0, squeeze_mask = v_cache_67_squeeze_mask_0, x = read_state_3)[name = string("v_cache_67_cast_fp16")];
+            int32 var_3524 = const()[name = string("op_3524"), val = int32(-1)];
+            tensor<int32, [1]> var_3542_axes_0 = const()[name = string("op_3542_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580920704)))];
+            tensor<fp16, [1024]> blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580922816)))];
+            fp16 var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_3542_cast_fp16 = layer_norm(axes = var_3542_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_3530_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_291_cast_fp16)[name = string("op_3542_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3553_to_fp16 = const()[name = string("op_3553_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580924928)))];
+            tensor<fp16, [1024]> var_3554_to_fp16 = const()[name = string("op_3554_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583022144)))];
+            tensor<fp16, [1, ?, 1024]> linear_128_cast_fp16 = linear(bias = var_3554_to_fp16, weight = var_3553_to_fp16, x = var_3542_cast_fp16)[name = string("linear_128_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3557_to_fp16 = const()[name = string("op_3557_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583024256)))];
+            tensor<fp16, [1, ?, 1024]> linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3557_to_fp16, x = var_3542_cast_fp16)[name = string("linear_129_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3561_to_fp16 = const()[name = string("op_3561_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585121472)))];
+            tensor<fp16, [1024]> var_3562_to_fp16 = const()[name = string("op_3562_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(587218688)))];
+            tensor<fp16, [1, ?, 1024]> linear_130_cast_fp16 = linear(bias = var_3562_to_fp16, weight = var_3561_to_fp16, x = var_3542_cast_fp16)[name = string("linear_130_cast_fp16")];
+            tensor<int32, [3]> var_3564_shape_cast_fp16 = shape(x = linear_128_cast_fp16)[name = string("op_3564_shape_cast_fp16")];
+            int32 gather_194_axis_0 = const()[name = string("gather_194_axis_0"), val = int32(0)];
+            int32 gather_194_batch_dims_0 = const()[name = string("gather_194_batch_dims_0"), val = int32(0)];
+            bool gather_194_validate_indices_0 = const()[name = string("gather_194_validate_indices_0"), val = bool(false)];
+            string var_3564_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3564_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_194_to_uint16 = const()[name = string("select_194_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3564_shape_cast_fp16_to_uint16 = cast(dtype = var_3564_shape_cast_fp16_to_uint16_dtype_0, x = var_3564_shape_cast_fp16)[name = string("cast_262")];
+            uint16 gather_194_cast_uint16 = gather(axis = gather_194_axis_0, batch_dims = gather_194_batch_dims_0, indices = select_194_to_uint16, validate_indices = gather_194_validate_indices_0, x = var_3564_shape_cast_fp16_to_uint16)[name = string("gather_194_cast_uint16")];
+            string gather_194_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_194_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_194_cast_uint16_to_int32 = cast(dtype = gather_194_cast_uint16_to_int32_dtype_0, x = gather_194_cast_uint16)[name = string("cast_261")];
+            int32 end_step_35 = add(x = offset, y = gather_194_cast_uint16_to_int32)[name = string("end_step_35")];
+            tensor<int32, [1]> expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = end_step_35)[name = string("expand_dims_259")];
+            tensor<int32, [1]> concat_356_values0_0 = const()[name = string("concat_356_values0_0"), val = tensor<int32, [1]>([16])];
+            int32 concat_356_axis_0 = const()[name = string("concat_356_axis_0"), val = int32(0)];
+            bool concat_356_interleave_0 = const()[name = string("concat_356_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_356 = concat(axis = concat_356_axis_0, interleave = concat_356_interleave_0, values = (concat_356_values0_0, expand_dims_256, expand_dims_1, expand_dims_258))[name = string("concat_356")];
+            tensor<int32, [1]> concat_357_values0_0 = const()[name = string("concat_357_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_357_values1_0 = const()[name = string("concat_357_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_357_values3_0 = const()[name = string("concat_357_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)];
+            bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (concat_357_values0_0, concat_357_values1_0, expand_dims_259, concat_357_values3_0))[name = string("concat_357")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = k_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = k_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_17_stride_0, update = linear_129_cast_fp16, x = coreml_update_state_78)[name = string("k_cache1_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_17_cast_fp16, input = k_cache1)[name = string("coreml_update_state_80_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_80 = read_state(input = k_cache1)[name = string("coreml_update_state_80")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_17_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = v_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = v_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_17_stride_0, update = linear_130_cast_fp16, x = coreml_update_state_79)[name = string("v_cache1_internal_tensor_assign_17_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_17_cast_fp16, input = v_cache1)[name = string("coreml_update_state_81_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_81 = read_state(input = v_cache1)[name = string("coreml_update_state_81")];
+            int32 concat_362_values0_0 = const()[name = string("concat_362_values0_0"), val = int32(1)];
+            int32 concat_362_values2_0 = const()[name = string("concat_362_values2_0"), val = int32(1024)];
+            int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)];
+            bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (concat_362_values0_0, end_step_35, concat_362_values2_0))[name = string("concat_362")];
+            tensor<int32, [3]> var_3580_begin_0 = const()[name = string("op_3580_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3580_end_mask_0 = const()[name = string("op_3580_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_3580_cast_fp16 = slice_by_index(begin = var_3580_begin_0, end = concat_362, end_mask = var_3580_end_mask_0, x = k_cache_65_cast_fp16)[name = string("op_3580_cast_fp16")];
+            tensor<int32, [3]> var_3583_begin_0 = const()[name = string("op_3583_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3583_end_mask_0 = const()[name = string("op_3583_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_3583_cast_fp16 = slice_by_index(begin = var_3583_begin_0, end = concat_362, end_mask = var_3583_end_mask_0, x = v_cache_65_cast_fp16)[name = string("op_3583_cast_fp16")];
+            tensor<int32, [4]> concat_364x = const()[name = string("concat_364x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3593_cast_fp16 = reshape(shape = concat_364x, x = linear_128_cast_fp16)[name = string("op_3593_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_184_to_fp16 = const()[name = string("const_184_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_131_cast_fp16 = mul(x = var_3593_cast_fp16, y = const_184_to_fp16)[name = string("q_131_cast_fp16")];
+            tensor<int32, [4]> concat_365x = const()[name = string("concat_365x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3600_cast_fp16 = reshape(shape = concat_365x, x = var_3580_cast_fp16)[name = string("op_3600_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_185_to_fp16 = const()[name = string("const_185_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_165_cast_fp16 = mul(x = var_3600_cast_fp16, y = const_185_to_fp16)[name = string("k_165_cast_fp16")];
+            tensor<int32, [4]> concat_366x = const()[name = string("concat_366x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3607_cast_fp16 = reshape(shape = concat_366x, x = var_3583_cast_fp16)[name = string("op_3607_cast_fp16")];
+            tensor<int32, [4]> var_3608 = const()[name = string("op_3608"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_97_transpose_x_0 = const()[name = string("qk_97_transpose_x_0"), val = bool(false)];
+            bool qk_97_transpose_y_0 = const()[name = string("qk_97_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_257_perm_0 = const()[name = string("transpose_257_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_258_perm_0 = const()[name = string("transpose_258_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_258 = transpose(perm = transpose_258_perm_0, x = k_165_cast_fp16)[name = string("transpose_350")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_257 = transpose(perm = transpose_257_perm_0, x = q_131_cast_fp16)[name = string("transpose_351")];
+            tensor<fp16, [1, 16, ?, ?]> qk_97_cast_fp16 = matmul(transpose_x = qk_97_transpose_x_0, transpose_y = qk_97_transpose_y_0, x = transpose_257, y = transpose_258)[name = string("qk_97_cast_fp16")];
+            int32 concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = int32(448)];
+            int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)];
+            bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (gather_194_cast_uint16_to_int32, concat_367_values1_0))[name = string("concat_367")];
+            tensor<int32, [2]> var_3611_begin_0 = const()[name = string("op_3611_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3611_end_mask_0 = const()[name = string("op_3611_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3611_cast_fp16 = slice_by_index(begin = var_3611_begin_0, end = concat_367, end_mask = var_3611_end_mask_0, x = mask_to_fp16)[name = string("op_3611_cast_fp16")];
+            int32 concat_368_values0_0 = const()[name = string("concat_368_values0_0"), val = int32(0)];
+            int32 concat_368_axis_0 = const()[name = string("concat_368_axis_0"), val = int32(0)];
+            bool concat_368_interleave_0 = const()[name = string("concat_368_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_368 = concat(axis = concat_368_axis_0, interleave = concat_368_interleave_0, values = (concat_368_values0_0, gather_194_cast_uint16_to_int32))[name = string("concat_368")];
+            tensor<int32, [2]> var_3612_begin_0 = const()[name = string("op_3612_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3612_end_mask_0 = const()[name = string("op_3612_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3612_cast_fp16 = slice_by_index(begin = var_3612_begin_0, end = concat_368, end_mask = var_3612_end_mask_0, x = var_3611_cast_fp16)[name = string("op_3612_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_99_cast_fp16 = add(x = qk_97_cast_fp16, y = var_3612_cast_fp16)[name = string("qk_99_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_3615_cast_fp16 = softmax(axis = var_3524, x = qk_99_cast_fp16)[name = string("op_3615_cast_fp16")];
+            bool var_3617_transpose_x_0 = const()[name = string("op_3617_transpose_x_0"), val = bool(false)];
+            bool var_3617_transpose_y_0 = const()[name = string("op_3617_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_165_cast_fp16 = transpose(perm = var_3608, x = var_3607_cast_fp16)[name = string("transpose_352")];
+            tensor<fp16, [1, 16, ?, 64]> var_3617_cast_fp16 = matmul(transpose_x = var_3617_transpose_x_0, transpose_y = var_3617_transpose_y_0, x = var_3615_cast_fp16, y = v_165_cast_fp16)[name = string("op_3617_cast_fp16")];
+            tensor<int32, [4]> var_3618 = const()[name = string("op_3618"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_369x = const()[name = string("concat_369x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_3619_cast_fp16 = transpose(perm = var_3618, x = var_3617_cast_fp16)[name = string("transpose_349")];
+            tensor<fp16, [1, ?, 1024]> x_295_cast_fp16 = reshape(shape = concat_369x, x = var_3619_cast_fp16)[name = string("x_295_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3623_to_fp16 = const()[name = string("op_3623_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(587220800)))];
+            tensor<fp16, [1024]> var_3624_to_fp16 = const()[name = string("op_3624_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589318016)))];
+            tensor<fp16, [1, ?, 1024]> linear_131_cast_fp16 = linear(bias = var_3624_to_fp16, weight = var_3623_to_fp16, x = x_295_cast_fp16)[name = string("linear_131_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_297_cast_fp16 = add(x = x_291_cast_fp16, y = linear_131_cast_fp16)[name = string("x_297_cast_fp16")];
+            tensor<int32, [1]> var_3631_axes_0 = const()[name = string("op_3631_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_16_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589320128)))];
+            tensor<fp16, [1024]> blocks_16_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589322240)))];
+            tensor<fp16, [1, ?, 1024]> var_3631_cast_fp16 = layer_norm(axes = var_3631_axes_0, beta = blocks_16_cross_attn_ln_bias_to_fp16, epsilon = var_3530_to_fp16, gamma = blocks_16_cross_attn_ln_weight_to_fp16, x = x_297_cast_fp16)[name = string("op_3631_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3640_to_fp16 = const()[name = string("op_3640_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589324352)))];
+            tensor<fp16, [1024]> var_3641_to_fp16 = const()[name = string("op_3641_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591421568)))];
+            tensor<fp16, [1, ?, 1024]> linear_132_cast_fp16 = linear(bias = var_3641_to_fp16, weight = var_3640_to_fp16, x = var_3631_cast_fp16)[name = string("linear_132_cast_fp16")];
+            tensor<int32, [3]> concat_370 = const()[name = string("concat_370"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_371 = const()[name = string("concat_371"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_167_internal_tensor_assign_1_stride_0 = const()[name = string("k_167_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_370, begin_mask = k_167_internal_tensor_assign_1_begin_mask_0, end = concat_371, end_mask = k_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_167_internal_tensor_assign_1_squeeze_mask_0, stride = k_167_internal_tensor_assign_1_stride_0, update = k_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("k_167_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_372 = const()[name = string("concat_372"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_373 = const()[name = string("concat_373"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_167_internal_tensor_assign_1_stride_0 = const()[name = string("v_167_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_372, begin_mask = v_167_internal_tensor_assign_1_begin_mask_0, end = concat_373, end_mask = v_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_167_internal_tensor_assign_1_squeeze_mask_0, stride = v_167_internal_tensor_assign_1_stride_0, update = v_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("v_167_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_374x = const()[name = string("concat_374x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3661_cast_fp16 = reshape(shape = concat_374x, x = linear_132_cast_fp16)[name = string("op_3661_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_135_cast_fp16 = mul(x = var_3661_cast_fp16, y = const_186_to_fp16)[name = string("q_135_cast_fp16")];
+            tensor<int32, [4]> var_3667 = const()[name = string("op_3667"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_3668_cast_fp16 = reshape(shape = var_3667, x = k_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3668_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_187_to_fp16 = const()[name = string("const_187_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_169_cast_fp16 = mul(x = var_3668_cast_fp16, y = const_187_to_fp16)[name = string("k_169_cast_fp16")];
+            tensor<int32, [4]> var_3674 = const()[name = string("op_3674"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_3675_cast_fp16 = reshape(shape = var_3674, x = v_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3675_cast_fp16")];
+            tensor<int32, [4]> var_3676 = const()[name = string("op_3676"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_101_transpose_x_0 = const()[name = string("qk_101_transpose_x_0"), val = bool(false)];
+            bool qk_101_transpose_y_0 = const()[name = string("qk_101_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_259_perm_0 = const()[name = string("transpose_259_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_260_perm_0 = const()[name = string("transpose_260_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_260 = transpose(perm = transpose_260_perm_0, x = k_169_cast_fp16)[name = string("transpose_346")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_259 = transpose(perm = transpose_259_perm_0, x = q_135_cast_fp16)[name = string("transpose_347")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_101_cast_fp16 = matmul(transpose_x = qk_101_transpose_x_0, transpose_y = qk_101_transpose_y_0, x = transpose_259, y = transpose_260)[name = string("qk_101_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_3680_cast_fp16 = softmax(axis = var_3524, x = qk_101_cast_fp16)[name = string("op_3680_cast_fp16")];
+            bool var_3682_transpose_x_0 = const()[name = string("op_3682_transpose_x_0"), val = bool(false)];
+            bool var_3682_transpose_y_0 = const()[name = string("op_3682_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_169_cast_fp16 = transpose(perm = var_3676, x = var_3675_cast_fp16)[name = string("transpose_348")];
+            tensor<fp16, [1, 16, ?, 64]> var_3682_cast_fp16 = matmul(transpose_x = var_3682_transpose_x_0, transpose_y = var_3682_transpose_y_0, x = var_3680_cast_fp16, y = v_169_cast_fp16)[name = string("op_3682_cast_fp16")];
+            tensor<int32, [4]> var_3683 = const()[name = string("op_3683"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_375x = const()[name = string("concat_375x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_3684_cast_fp16 = transpose(perm = var_3683, x = var_3682_cast_fp16)[name = string("transpose_345")];
+            tensor<fp16, [1, ?, 1024]> x_301_cast_fp16 = reshape(shape = concat_375x, x = var_3684_cast_fp16)[name = string("x_301_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3688_to_fp16 = const()[name = string("op_3688_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591423680)))];
+            tensor<fp16, [1024]> var_3689_to_fp16 = const()[name = string("op_3689_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593520896)))];
+            tensor<fp16, [1, ?, 1024]> linear_133_cast_fp16 = linear(bias = var_3689_to_fp16, weight = var_3688_to_fp16, x = x_301_cast_fp16)[name = string("linear_133_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_303_cast_fp16 = add(x = x_297_cast_fp16, y = linear_133_cast_fp16)[name = string("x_303_cast_fp16")];
+            tensor<int32, [1]> var_3696_axes_0 = const()[name = string("op_3696_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593523008)))];
+            tensor<fp16, [1024]> blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593525120)))];
+            tensor<fp16, [1, ?, 1024]> var_3696_cast_fp16 = layer_norm(axes = var_3696_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_3530_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_303_cast_fp16)[name = string("op_3696_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_3705_to_fp16 = const()[name = string("op_3705_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593527232)))];
+            tensor<fp16, [4096]> var_3706_to_fp16 = const()[name = string("op_3706_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601915904)))];
+            tensor<fp16, [1, ?, 4096]> linear_134_cast_fp16 = linear(bias = var_3706_to_fp16, weight = var_3705_to_fp16, x = var_3696_cast_fp16)[name = string("linear_134_cast_fp16")];
+            string x_307_mode_0 = const()[name = string("x_307_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_307_cast_fp16 = gelu(mode = x_307_mode_0, x = linear_134_cast_fp16)[name = string("x_307_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_3711_to_fp16 = const()[name = string("op_3711_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601924160)))];
+            tensor<fp16, [1024]> var_3712_to_fp16 = const()[name = string("op_3712_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610312832)))];
+            tensor<fp16, [1, ?, 1024]> linear_135_cast_fp16 = linear(bias = var_3712_to_fp16, weight = var_3711_to_fp16, x = x_307_cast_fp16)[name = string("linear_135_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_309_cast_fp16 = add(x = x_303_cast_fp16, y = linear_135_cast_fp16)[name = string("x_309_cast_fp16")];
+            tensor<int32, [4]> k_cache_69_begin_0 = const()[name = string("k_cache_69_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_69_end_0 = const()[name = string("k_cache_69_end_0"), val = tensor<int32, [4]>([18, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_69_end_mask_0 = const()[name = string("k_cache_69_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_69_squeeze_mask_0 = const()[name = string("k_cache_69_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_69_cast_fp16 = slice_by_index(begin = k_cache_69_begin_0, end = k_cache_69_end_0, end_mask = k_cache_69_end_mask_0, squeeze_mask = k_cache_69_squeeze_mask_0, x = coreml_update_state_80)[name = string("k_cache_69_cast_fp16")];
+            tensor<int32, [4]> v_cache_69_begin_0 = const()[name = string("v_cache_69_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_69_end_0 = const()[name = string("v_cache_69_end_0"), val = tensor<int32, [4]>([18, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_69_end_mask_0 = const()[name = string("v_cache_69_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_69_squeeze_mask_0 = const()[name = string("v_cache_69_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_69_cast_fp16 = slice_by_index(begin = v_cache_69_begin_0, end = v_cache_69_end_0, end_mask = v_cache_69_end_mask_0, squeeze_mask = v_cache_69_squeeze_mask_0, x = coreml_update_state_81)[name = string("v_cache_69_cast_fp16")];
+            tensor<int32, [4]> k_cache_71_begin_0 = const()[name = string("k_cache_71_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_71_end_0 = const()[name = string("k_cache_71_end_0"), val = tensor<int32, [4]>([18, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_71_end_mask_0 = const()[name = string("k_cache_71_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_71_squeeze_mask_0 = const()[name = string("k_cache_71_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_71_cast_fp16 = slice_by_index(begin = k_cache_71_begin_0, end = k_cache_71_end_0, end_mask = k_cache_71_end_mask_0, squeeze_mask = k_cache_71_squeeze_mask_0, x = read_state_2)[name = string("k_cache_71_cast_fp16")];
+            tensor<int32, [4]> v_cache_71_begin_0 = const()[name = string("v_cache_71_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_71_end_0 = const()[name = string("v_cache_71_end_0"), val = tensor<int32, [4]>([18, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_71_end_mask_0 = const()[name = string("v_cache_71_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_71_squeeze_mask_0 = const()[name = string("v_cache_71_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_71_cast_fp16 = slice_by_index(begin = v_cache_71_begin_0, end = v_cache_71_end_0, end_mask = v_cache_71_end_mask_0, squeeze_mask = v_cache_71_squeeze_mask_0, x = read_state_3)[name = string("v_cache_71_cast_fp16")];
+            int32 var_3735 = const()[name = string("op_3735"), val = int32(-1)];
+            tensor<int32, [1]> var_3753_axes_0 = const()[name = string("op_3753_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610314944)))];
+            tensor<fp16, [1024]> blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610317056)))];
+            fp16 var_3741_to_fp16 = const()[name = string("op_3741_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_3753_cast_fp16 = layer_norm(axes = var_3753_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_3741_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_309_cast_fp16)[name = string("op_3753_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3764_to_fp16 = const()[name = string("op_3764_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610319168)))];
+            tensor<fp16, [1024]> var_3765_to_fp16 = const()[name = string("op_3765_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(612416384)))];
+            tensor<fp16, [1, ?, 1024]> linear_136_cast_fp16 = linear(bias = var_3765_to_fp16, weight = var_3764_to_fp16, x = var_3753_cast_fp16)[name = string("linear_136_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3768_to_fp16 = const()[name = string("op_3768_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(612418496)))];
+            tensor<fp16, [1, ?, 1024]> linear_137_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3768_to_fp16, x = var_3753_cast_fp16)[name = string("linear_137_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3772_to_fp16 = const()[name = string("op_3772_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614515712)))];
+            tensor<fp16, [1024]> var_3773_to_fp16 = const()[name = string("op_3773_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(616612928)))];
+            tensor<fp16, [1, ?, 1024]> linear_138_cast_fp16 = linear(bias = var_3773_to_fp16, weight = var_3772_to_fp16, x = var_3753_cast_fp16)[name = string("linear_138_cast_fp16")];
+            tensor<int32, [3]> var_3775_shape_cast_fp16 = shape(x = linear_136_cast_fp16)[name = string("op_3775_shape_cast_fp16")];
+            int32 gather_206_axis_0 = const()[name = string("gather_206_axis_0"), val = int32(0)];
+            int32 gather_206_batch_dims_0 = const()[name = string("gather_206_batch_dims_0"), val = int32(0)];
+            bool gather_206_validate_indices_0 = const()[name = string("gather_206_validate_indices_0"), val = bool(false)];
+            string var_3775_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3775_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_206_to_uint16 = const()[name = string("select_206_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3775_shape_cast_fp16_to_uint16 = cast(dtype = var_3775_shape_cast_fp16_to_uint16_dtype_0, x = var_3775_shape_cast_fp16)[name = string("cast_260")];
+            uint16 gather_206_cast_uint16 = gather(axis = gather_206_axis_0, batch_dims = gather_206_batch_dims_0, indices = select_206_to_uint16, validate_indices = gather_206_validate_indices_0, x = var_3775_shape_cast_fp16_to_uint16)[name = string("gather_206_cast_uint16")];
+            string gather_206_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_206_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_206_cast_uint16_to_int32 = cast(dtype = gather_206_cast_uint16_to_int32_dtype_0, x = gather_206_cast_uint16)[name = string("cast_259")];
+            int32 end_step_37 = add(x = offset, y = gather_206_cast_uint16_to_int32)[name = string("end_step_37")];
+            tensor<int32, [1]> expand_dims_272 = const()[name = string("expand_dims_272"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_275_axes_0 = const()[name = string("expand_dims_275_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_275 = expand_dims(axes = expand_dims_275_axes_0, x = end_step_37)[name = string("expand_dims_275")];
+            tensor<int32, [1]> concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = tensor<int32, [1]>([17])];
+            int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)];
+            bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, expand_dims_272, expand_dims_1, expand_dims_274))[name = string("concat_378")];
+            tensor<int32, [1]> concat_379_values0_0 = const()[name = string("concat_379_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_379_values1_0 = const()[name = string("concat_379_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_379_values3_0 = const()[name = string("concat_379_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)];
+            bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (concat_379_values0_0, concat_379_values1_0, expand_dims_275, concat_379_values3_0))[name = string("concat_379")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = k_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = k_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_18_stride_0, update = linear_137_cast_fp16, x = coreml_update_state_80)[name = string("k_cache1_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_18_cast_fp16, input = k_cache1)[name = string("coreml_update_state_82_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_82 = read_state(input = k_cache1)[name = string("coreml_update_state_82")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_18_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = v_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = v_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_18_stride_0, update = linear_138_cast_fp16, x = coreml_update_state_81)[name = string("v_cache1_internal_tensor_assign_18_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_18_cast_fp16, input = v_cache1)[name = string("coreml_update_state_83_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_83 = read_state(input = v_cache1)[name = string("coreml_update_state_83")];
+            int32 concat_384_values0_0 = const()[name = string("concat_384_values0_0"), val = int32(1)];
+            int32 concat_384_values2_0 = const()[name = string("concat_384_values2_0"), val = int32(1024)];
+            int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)];
+            bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (concat_384_values0_0, end_step_37, concat_384_values2_0))[name = string("concat_384")];
+            tensor<int32, [3]> var_3791_begin_0 = const()[name = string("op_3791_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3791_end_mask_0 = const()[name = string("op_3791_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_3791_cast_fp16 = slice_by_index(begin = var_3791_begin_0, end = concat_384, end_mask = var_3791_end_mask_0, x = k_cache_69_cast_fp16)[name = string("op_3791_cast_fp16")];
+            tensor<int32, [3]> var_3794_begin_0 = const()[name = string("op_3794_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_3794_end_mask_0 = const()[name = string("op_3794_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_3794_cast_fp16 = slice_by_index(begin = var_3794_begin_0, end = concat_384, end_mask = var_3794_end_mask_0, x = v_cache_69_cast_fp16)[name = string("op_3794_cast_fp16")];
+            tensor<int32, [4]> concat_386x = const()[name = string("concat_386x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3804_cast_fp16 = reshape(shape = concat_386x, x = linear_136_cast_fp16)[name = string("op_3804_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_139_cast_fp16 = mul(x = var_3804_cast_fp16, y = const_188_to_fp16)[name = string("q_139_cast_fp16")];
+            tensor<int32, [4]> concat_387x = const()[name = string("concat_387x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3811_cast_fp16 = reshape(shape = concat_387x, x = var_3791_cast_fp16)[name = string("op_3811_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_175_cast_fp16 = mul(x = var_3811_cast_fp16, y = const_189_to_fp16)[name = string("k_175_cast_fp16")];
+            tensor<int32, [4]> concat_388x = const()[name = string("concat_388x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3818_cast_fp16 = reshape(shape = concat_388x, x = var_3794_cast_fp16)[name = string("op_3818_cast_fp16")];
+            tensor<int32, [4]> var_3819 = const()[name = string("op_3819"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_103_transpose_x_0 = const()[name = string("qk_103_transpose_x_0"), val = bool(false)];
+            bool qk_103_transpose_y_0 = const()[name = string("qk_103_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_261_perm_0 = const()[name = string("transpose_261_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_262_perm_0 = const()[name = string("transpose_262_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_262 = transpose(perm = transpose_262_perm_0, x = k_175_cast_fp16)[name = string("transpose_342")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_261 = transpose(perm = transpose_261_perm_0, x = q_139_cast_fp16)[name = string("transpose_343")];
+            tensor<fp16, [1, 16, ?, ?]> qk_103_cast_fp16 = matmul(transpose_x = qk_103_transpose_x_0, transpose_y = qk_103_transpose_y_0, x = transpose_261, y = transpose_262)[name = string("qk_103_cast_fp16")];
+            int32 concat_389_values1_0 = const()[name = string("concat_389_values1_0"), val = int32(448)];
+            int32 concat_389_axis_0 = const()[name = string("concat_389_axis_0"), val = int32(0)];
+            bool concat_389_interleave_0 = const()[name = string("concat_389_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_389 = concat(axis = concat_389_axis_0, interleave = concat_389_interleave_0, values = (gather_206_cast_uint16_to_int32, concat_389_values1_0))[name = string("concat_389")];
+            tensor<int32, [2]> var_3822_begin_0 = const()[name = string("op_3822_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3822_end_mask_0 = const()[name = string("op_3822_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_3822_cast_fp16 = slice_by_index(begin = var_3822_begin_0, end = concat_389, end_mask = var_3822_end_mask_0, x = mask_to_fp16)[name = string("op_3822_cast_fp16")];
+            int32 concat_390_values0_0 = const()[name = string("concat_390_values0_0"), val = int32(0)];
+            int32 concat_390_axis_0 = const()[name = string("concat_390_axis_0"), val = int32(0)];
+            bool concat_390_interleave_0 = const()[name = string("concat_390_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_390 = concat(axis = concat_390_axis_0, interleave = concat_390_interleave_0, values = (concat_390_values0_0, gather_206_cast_uint16_to_int32))[name = string("concat_390")];
+            tensor<int32, [2]> var_3823_begin_0 = const()[name = string("op_3823_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_3823_end_mask_0 = const()[name = string("op_3823_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_3823_cast_fp16 = slice_by_index(begin = var_3823_begin_0, end = concat_390, end_mask = var_3823_end_mask_0, x = var_3822_cast_fp16)[name = string("op_3823_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_105_cast_fp16 = add(x = qk_103_cast_fp16, y = var_3823_cast_fp16)[name = string("qk_105_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_3826_cast_fp16 = softmax(axis = var_3735, x = qk_105_cast_fp16)[name = string("op_3826_cast_fp16")];
+            bool var_3828_transpose_x_0 = const()[name = string("op_3828_transpose_x_0"), val = bool(false)];
+            bool var_3828_transpose_y_0 = const()[name = string("op_3828_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_175_cast_fp16 = transpose(perm = var_3819, x = var_3818_cast_fp16)[name = string("transpose_344")];
+            tensor<fp16, [1, 16, ?, 64]> var_3828_cast_fp16 = matmul(transpose_x = var_3828_transpose_x_0, transpose_y = var_3828_transpose_y_0, x = var_3826_cast_fp16, y = v_175_cast_fp16)[name = string("op_3828_cast_fp16")];
+            tensor<int32, [4]> var_3829 = const()[name = string("op_3829"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_391x = const()[name = string("concat_391x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_3830_cast_fp16 = transpose(perm = var_3829, x = var_3828_cast_fp16)[name = string("transpose_341")];
+            tensor<fp16, [1, ?, 1024]> x_313_cast_fp16 = reshape(shape = concat_391x, x = var_3830_cast_fp16)[name = string("x_313_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3834_to_fp16 = const()[name = string("op_3834_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(616615040)))];
+            tensor<fp16, [1024]> var_3835_to_fp16 = const()[name = string("op_3835_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618712256)))];
+            tensor<fp16, [1, ?, 1024]> linear_139_cast_fp16 = linear(bias = var_3835_to_fp16, weight = var_3834_to_fp16, x = x_313_cast_fp16)[name = string("linear_139_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_315_cast_fp16 = add(x = x_309_cast_fp16, y = linear_139_cast_fp16)[name = string("x_315_cast_fp16")];
+            tensor<int32, [1]> var_3842_axes_0 = const()[name = string("op_3842_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_17_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618714368)))];
+            tensor<fp16, [1024]> blocks_17_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618716480)))];
+            tensor<fp16, [1, ?, 1024]> var_3842_cast_fp16 = layer_norm(axes = var_3842_axes_0, beta = blocks_17_cross_attn_ln_bias_to_fp16, epsilon = var_3741_to_fp16, gamma = blocks_17_cross_attn_ln_weight_to_fp16, x = x_315_cast_fp16)[name = string("op_3842_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3851_to_fp16 = const()[name = string("op_3851_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618718592)))];
+            tensor<fp16, [1024]> var_3852_to_fp16 = const()[name = string("op_3852_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620815808)))];
+            tensor<fp16, [1, ?, 1024]> linear_140_cast_fp16 = linear(bias = var_3852_to_fp16, weight = var_3851_to_fp16, x = var_3842_cast_fp16)[name = string("linear_140_cast_fp16")];
+            tensor<int32, [3]> concat_392 = const()[name = string("concat_392"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_393 = const()[name = string("concat_393"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_177_internal_tensor_assign_1_stride_0 = const()[name = string("k_177_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_392, begin_mask = k_177_internal_tensor_assign_1_begin_mask_0, end = concat_393, end_mask = k_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_177_internal_tensor_assign_1_squeeze_mask_0, stride = k_177_internal_tensor_assign_1_stride_0, update = k_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("k_177_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_394 = const()[name = string("concat_394"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_395 = const()[name = string("concat_395"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_177_internal_tensor_assign_1_stride_0 = const()[name = string("v_177_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_394, begin_mask = v_177_internal_tensor_assign_1_begin_mask_0, end = concat_395, end_mask = v_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_177_internal_tensor_assign_1_squeeze_mask_0, stride = v_177_internal_tensor_assign_1_stride_0, update = v_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("v_177_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_396x = const()[name = string("concat_396x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_3872_cast_fp16 = reshape(shape = concat_396x, x = linear_140_cast_fp16)[name = string("op_3872_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_143_cast_fp16 = mul(x = var_3872_cast_fp16, y = const_190_to_fp16)[name = string("q_143_cast_fp16")];
+            tensor<int32, [4]> var_3878 = const()[name = string("op_3878"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_3879_cast_fp16 = reshape(shape = var_3878, x = k_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3879_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_179_cast_fp16 = mul(x = var_3879_cast_fp16, y = const_191_to_fp16)[name = string("k_179_cast_fp16")];
+            tensor<int32, [4]> var_3885 = const()[name = string("op_3885"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_3886_cast_fp16 = reshape(shape = var_3885, x = v_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3886_cast_fp16")];
+            tensor<int32, [4]> var_3887 = const()[name = string("op_3887"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_107_transpose_x_0 = const()[name = string("qk_107_transpose_x_0"), val = bool(false)];
+            bool qk_107_transpose_y_0 = const()[name = string("qk_107_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_263_perm_0 = const()[name = string("transpose_263_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_264_perm_0 = const()[name = string("transpose_264_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_264 = transpose(perm = transpose_264_perm_0, x = k_179_cast_fp16)[name = string("transpose_338")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_263 = transpose(perm = transpose_263_perm_0, x = q_143_cast_fp16)[name = string("transpose_339")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_107_cast_fp16 = matmul(transpose_x = qk_107_transpose_x_0, transpose_y = qk_107_transpose_y_0, x = transpose_263, y = transpose_264)[name = string("qk_107_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_3891_cast_fp16 = softmax(axis = var_3735, x = qk_107_cast_fp16)[name = string("op_3891_cast_fp16")];
+            bool var_3893_transpose_x_0 = const()[name = string("op_3893_transpose_x_0"), val = bool(false)];
+            bool var_3893_transpose_y_0 = const()[name = string("op_3893_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_179_cast_fp16 = transpose(perm = var_3887, x = var_3886_cast_fp16)[name = string("transpose_340")];
+            tensor<fp16, [1, 16, ?, 64]> var_3893_cast_fp16 = matmul(transpose_x = var_3893_transpose_x_0, transpose_y = var_3893_transpose_y_0, x = var_3891_cast_fp16, y = v_179_cast_fp16)[name = string("op_3893_cast_fp16")];
+            tensor<int32, [4]> var_3894 = const()[name = string("op_3894"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_397x = const()[name = string("concat_397x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_3895_cast_fp16 = transpose(perm = var_3894, x = var_3893_cast_fp16)[name = string("transpose_337")];
+            tensor<fp16, [1, ?, 1024]> x_319_cast_fp16 = reshape(shape = concat_397x, x = var_3895_cast_fp16)[name = string("x_319_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3899_to_fp16 = const()[name = string("op_3899_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620817920)))];
+            tensor<fp16, [1024]> var_3900_to_fp16 = const()[name = string("op_3900_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622915136)))];
+            tensor<fp16, [1, ?, 1024]> linear_141_cast_fp16 = linear(bias = var_3900_to_fp16, weight = var_3899_to_fp16, x = x_319_cast_fp16)[name = string("linear_141_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_321_cast_fp16 = add(x = x_315_cast_fp16, y = linear_141_cast_fp16)[name = string("x_321_cast_fp16")];
+            tensor<int32, [1]> var_3907_axes_0 = const()[name = string("op_3907_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622917248)))];
+            tensor<fp16, [1024]> blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622919360)))];
+            tensor<fp16, [1, ?, 1024]> var_3907_cast_fp16 = layer_norm(axes = var_3907_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_3741_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_321_cast_fp16)[name = string("op_3907_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_3916_to_fp16 = const()[name = string("op_3916_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622921472)))];
+            tensor<fp16, [4096]> var_3917_to_fp16 = const()[name = string("op_3917_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631310144)))];
+            tensor<fp16, [1, ?, 4096]> linear_142_cast_fp16 = linear(bias = var_3917_to_fp16, weight = var_3916_to_fp16, x = var_3907_cast_fp16)[name = string("linear_142_cast_fp16")];
+            string x_325_mode_0 = const()[name = string("x_325_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_325_cast_fp16 = gelu(mode = x_325_mode_0, x = linear_142_cast_fp16)[name = string("x_325_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_3922_to_fp16 = const()[name = string("op_3922_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631318400)))];
+            tensor<fp16, [1024]> var_3923_to_fp16 = const()[name = string("op_3923_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639707072)))];
+            tensor<fp16, [1, ?, 1024]> linear_143_cast_fp16 = linear(bias = var_3923_to_fp16, weight = var_3922_to_fp16, x = x_325_cast_fp16)[name = string("linear_143_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_327_cast_fp16 = add(x = x_321_cast_fp16, y = linear_143_cast_fp16)[name = string("x_327_cast_fp16")];
+            tensor<int32, [4]> k_cache_73_begin_0 = const()[name = string("k_cache_73_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_73_end_0 = const()[name = string("k_cache_73_end_0"), val = tensor<int32, [4]>([19, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_73_end_mask_0 = const()[name = string("k_cache_73_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_73_squeeze_mask_0 = const()[name = string("k_cache_73_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_73_cast_fp16 = slice_by_index(begin = k_cache_73_begin_0, end = k_cache_73_end_0, end_mask = k_cache_73_end_mask_0, squeeze_mask = k_cache_73_squeeze_mask_0, x = coreml_update_state_82)[name = string("k_cache_73_cast_fp16")];
+            tensor<int32, [4]> v_cache_73_begin_0 = const()[name = string("v_cache_73_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_73_end_0 = const()[name = string("v_cache_73_end_0"), val = tensor<int32, [4]>([19, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_73_end_mask_0 = const()[name = string("v_cache_73_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_73_squeeze_mask_0 = const()[name = string("v_cache_73_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_73_cast_fp16 = slice_by_index(begin = v_cache_73_begin_0, end = v_cache_73_end_0, end_mask = v_cache_73_end_mask_0, squeeze_mask = v_cache_73_squeeze_mask_0, x = coreml_update_state_83)[name = string("v_cache_73_cast_fp16")];
+            tensor<int32, [4]> k_cache_75_begin_0 = const()[name = string("k_cache_75_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_75_end_0 = const()[name = string("k_cache_75_end_0"), val = tensor<int32, [4]>([19, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_75_end_mask_0 = const()[name = string("k_cache_75_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_75_squeeze_mask_0 = const()[name = string("k_cache_75_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_75_cast_fp16 = slice_by_index(begin = k_cache_75_begin_0, end = k_cache_75_end_0, end_mask = k_cache_75_end_mask_0, squeeze_mask = k_cache_75_squeeze_mask_0, x = read_state_2)[name = string("k_cache_75_cast_fp16")];
+            tensor<int32, [4]> v_cache_75_begin_0 = const()[name = string("v_cache_75_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_75_end_0 = const()[name = string("v_cache_75_end_0"), val = tensor<int32, [4]>([19, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_75_end_mask_0 = const()[name = string("v_cache_75_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_75_squeeze_mask_0 = const()[name = string("v_cache_75_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_75_cast_fp16 = slice_by_index(begin = v_cache_75_begin_0, end = v_cache_75_end_0, end_mask = v_cache_75_end_mask_0, squeeze_mask = v_cache_75_squeeze_mask_0, x = read_state_3)[name = string("v_cache_75_cast_fp16")];
+            int32 var_3946 = const()[name = string("op_3946"), val = int32(-1)];
+            tensor<int32, [1]> var_3964_axes_0 = const()[name = string("op_3964_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639709184)))];
+            tensor<fp16, [1024]> blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639711296)))];
+            fp16 var_3952_to_fp16 = const()[name = string("op_3952_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_3964_cast_fp16 = layer_norm(axes = var_3964_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_3952_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_327_cast_fp16)[name = string("op_3964_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3975_to_fp16 = const()[name = string("op_3975_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639713408)))];
+            tensor<fp16, [1024]> var_3976_to_fp16 = const()[name = string("op_3976_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641810624)))];
+            tensor<fp16, [1, ?, 1024]> linear_144_cast_fp16 = linear(bias = var_3976_to_fp16, weight = var_3975_to_fp16, x = var_3964_cast_fp16)[name = string("linear_144_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3979_to_fp16 = const()[name = string("op_3979_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641812736)))];
+            tensor<fp16, [1, ?, 1024]> linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3979_to_fp16, x = var_3964_cast_fp16)[name = string("linear_145_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_3983_to_fp16 = const()[name = string("op_3983_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643909952)))];
+            tensor<fp16, [1024]> var_3984_to_fp16 = const()[name = string("op_3984_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646007168)))];
+            tensor<fp16, [1, ?, 1024]> linear_146_cast_fp16 = linear(bias = var_3984_to_fp16, weight = var_3983_to_fp16, x = var_3964_cast_fp16)[name = string("linear_146_cast_fp16")];
+            tensor<int32, [3]> var_3986_shape_cast_fp16 = shape(x = linear_144_cast_fp16)[name = string("op_3986_shape_cast_fp16")];
+            int32 gather_218_axis_0 = const()[name = string("gather_218_axis_0"), val = int32(0)];
+            int32 gather_218_batch_dims_0 = const()[name = string("gather_218_batch_dims_0"), val = int32(0)];
+            bool gather_218_validate_indices_0 = const()[name = string("gather_218_validate_indices_0"), val = bool(false)];
+            string var_3986_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3986_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_218_to_uint16 = const()[name = string("select_218_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_3986_shape_cast_fp16_to_uint16 = cast(dtype = var_3986_shape_cast_fp16_to_uint16_dtype_0, x = var_3986_shape_cast_fp16)[name = string("cast_258")];
+            uint16 gather_218_cast_uint16 = gather(axis = gather_218_axis_0, batch_dims = gather_218_batch_dims_0, indices = select_218_to_uint16, validate_indices = gather_218_validate_indices_0, x = var_3986_shape_cast_fp16_to_uint16)[name = string("gather_218_cast_uint16")];
+            string gather_218_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_218_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_218_cast_uint16_to_int32 = cast(dtype = gather_218_cast_uint16_to_int32_dtype_0, x = gather_218_cast_uint16)[name = string("cast_257")];
+            int32 end_step_39 = add(x = offset, y = gather_218_cast_uint16_to_int32)[name = string("end_step_39")];
+            tensor<int32, [1]> expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_290 = const()[name = string("expand_dims_290"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_291_axes_0 = const()[name = string("expand_dims_291_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_291 = expand_dims(axes = expand_dims_291_axes_0, x = end_step_39)[name = string("expand_dims_291")];
+            tensor<int32, [1]> concat_400_values0_0 = const()[name = string("concat_400_values0_0"), val = tensor<int32, [1]>([18])];
+            int32 concat_400_axis_0 = const()[name = string("concat_400_axis_0"), val = int32(0)];
+            bool concat_400_interleave_0 = const()[name = string("concat_400_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_400 = concat(axis = concat_400_axis_0, interleave = concat_400_interleave_0, values = (concat_400_values0_0, expand_dims_288, expand_dims_1, expand_dims_290))[name = string("concat_400")];
+            tensor<int32, [1]> concat_401_values0_0 = const()[name = string("concat_401_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_401_values1_0 = const()[name = string("concat_401_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_401_values3_0 = const()[name = string("concat_401_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_401_axis_0 = const()[name = string("concat_401_axis_0"), val = int32(0)];
+            bool concat_401_interleave_0 = const()[name = string("concat_401_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_401 = concat(axis = concat_401_axis_0, interleave = concat_401_interleave_0, values = (concat_401_values0_0, concat_401_values1_0, expand_dims_291, concat_401_values3_0))[name = string("concat_401")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = k_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = k_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_19_stride_0, update = linear_145_cast_fp16, x = coreml_update_state_82)[name = string("k_cache1_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_19_cast_fp16, input = k_cache1)[name = string("coreml_update_state_84_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_84 = read_state(input = k_cache1)[name = string("coreml_update_state_84")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_19_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = v_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = v_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_19_stride_0, update = linear_146_cast_fp16, x = coreml_update_state_83)[name = string("v_cache1_internal_tensor_assign_19_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_19_cast_fp16, input = v_cache1)[name = string("coreml_update_state_85_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_85 = read_state(input = v_cache1)[name = string("coreml_update_state_85")];
+            int32 concat_406_values0_0 = const()[name = string("concat_406_values0_0"), val = int32(1)];
+            int32 concat_406_values2_0 = const()[name = string("concat_406_values2_0"), val = int32(1024)];
+            int32 concat_406_axis_0 = const()[name = string("concat_406_axis_0"), val = int32(0)];
+            bool concat_406_interleave_0 = const()[name = string("concat_406_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_406 = concat(axis = concat_406_axis_0, interleave = concat_406_interleave_0, values = (concat_406_values0_0, end_step_39, concat_406_values2_0))[name = string("concat_406")];
+            tensor<int32, [3]> var_4002_begin_0 = const()[name = string("op_4002_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4002_end_mask_0 = const()[name = string("op_4002_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_4002_cast_fp16 = slice_by_index(begin = var_4002_begin_0, end = concat_406, end_mask = var_4002_end_mask_0, x = k_cache_73_cast_fp16)[name = string("op_4002_cast_fp16")];
+            tensor<int32, [3]> var_4005_begin_0 = const()[name = string("op_4005_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4005_end_mask_0 = const()[name = string("op_4005_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_4005_cast_fp16 = slice_by_index(begin = var_4005_begin_0, end = concat_406, end_mask = var_4005_end_mask_0, x = v_cache_73_cast_fp16)[name = string("op_4005_cast_fp16")];
+            tensor<int32, [4]> concat_408x = const()[name = string("concat_408x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4015_cast_fp16 = reshape(shape = concat_408x, x = linear_144_cast_fp16)[name = string("op_4015_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_147_cast_fp16 = mul(x = var_4015_cast_fp16, y = const_192_to_fp16)[name = string("q_147_cast_fp16")];
+            tensor<int32, [4]> concat_409x = const()[name = string("concat_409x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4022_cast_fp16 = reshape(shape = concat_409x, x = var_4002_cast_fp16)[name = string("op_4022_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_185_cast_fp16 = mul(x = var_4022_cast_fp16, y = const_193_to_fp16)[name = string("k_185_cast_fp16")];
+            tensor<int32, [4]> concat_410x = const()[name = string("concat_410x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4029_cast_fp16 = reshape(shape = concat_410x, x = var_4005_cast_fp16)[name = string("op_4029_cast_fp16")];
+            tensor<int32, [4]> var_4030 = const()[name = string("op_4030"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_109_transpose_x_0 = const()[name = string("qk_109_transpose_x_0"), val = bool(false)];
+            bool qk_109_transpose_y_0 = const()[name = string("qk_109_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_265_perm_0 = const()[name = string("transpose_265_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_266_perm_0 = const()[name = string("transpose_266_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_266 = transpose(perm = transpose_266_perm_0, x = k_185_cast_fp16)[name = string("transpose_334")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_265 = transpose(perm = transpose_265_perm_0, x = q_147_cast_fp16)[name = string("transpose_335")];
+            tensor<fp16, [1, 16, ?, ?]> qk_109_cast_fp16 = matmul(transpose_x = qk_109_transpose_x_0, transpose_y = qk_109_transpose_y_0, x = transpose_265, y = transpose_266)[name = string("qk_109_cast_fp16")];
+            int32 concat_411_values1_0 = const()[name = string("concat_411_values1_0"), val = int32(448)];
+            int32 concat_411_axis_0 = const()[name = string("concat_411_axis_0"), val = int32(0)];
+            bool concat_411_interleave_0 = const()[name = string("concat_411_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_411 = concat(axis = concat_411_axis_0, interleave = concat_411_interleave_0, values = (gather_218_cast_uint16_to_int32, concat_411_values1_0))[name = string("concat_411")];
+            tensor<int32, [2]> var_4033_begin_0 = const()[name = string("op_4033_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4033_end_mask_0 = const()[name = string("op_4033_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4033_cast_fp16 = slice_by_index(begin = var_4033_begin_0, end = concat_411, end_mask = var_4033_end_mask_0, x = mask_to_fp16)[name = string("op_4033_cast_fp16")];
+            int32 concat_412_values0_0 = const()[name = string("concat_412_values0_0"), val = int32(0)];
+            int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)];
+            bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (concat_412_values0_0, gather_218_cast_uint16_to_int32))[name = string("concat_412")];
+            tensor<int32, [2]> var_4034_begin_0 = const()[name = string("op_4034_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4034_end_mask_0 = const()[name = string("op_4034_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4034_cast_fp16 = slice_by_index(begin = var_4034_begin_0, end = concat_412, end_mask = var_4034_end_mask_0, x = var_4033_cast_fp16)[name = string("op_4034_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_111_cast_fp16 = add(x = qk_109_cast_fp16, y = var_4034_cast_fp16)[name = string("qk_111_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_4037_cast_fp16 = softmax(axis = var_3946, x = qk_111_cast_fp16)[name = string("op_4037_cast_fp16")];
+            bool var_4039_transpose_x_0 = const()[name = string("op_4039_transpose_x_0"), val = bool(false)];
+            bool var_4039_transpose_y_0 = const()[name = string("op_4039_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_185_cast_fp16 = transpose(perm = var_4030, x = var_4029_cast_fp16)[name = string("transpose_336")];
+            tensor<fp16, [1, 16, ?, 64]> var_4039_cast_fp16 = matmul(transpose_x = var_4039_transpose_x_0, transpose_y = var_4039_transpose_y_0, x = var_4037_cast_fp16, y = v_185_cast_fp16)[name = string("op_4039_cast_fp16")];
+            tensor<int32, [4]> var_4040 = const()[name = string("op_4040"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_413x = const()[name = string("concat_413x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_4041_cast_fp16 = transpose(perm = var_4040, x = var_4039_cast_fp16)[name = string("transpose_333")];
+            tensor<fp16, [1, ?, 1024]> x_331_cast_fp16 = reshape(shape = concat_413x, x = var_4041_cast_fp16)[name = string("x_331_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4045_to_fp16 = const()[name = string("op_4045_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646009280)))];
+            tensor<fp16, [1024]> var_4046_to_fp16 = const()[name = string("op_4046_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648106496)))];
+            tensor<fp16, [1, ?, 1024]> linear_147_cast_fp16 = linear(bias = var_4046_to_fp16, weight = var_4045_to_fp16, x = x_331_cast_fp16)[name = string("linear_147_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_333_cast_fp16 = add(x = x_327_cast_fp16, y = linear_147_cast_fp16)[name = string("x_333_cast_fp16")];
+            tensor<int32, [1]> var_4053_axes_0 = const()[name = string("op_4053_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_18_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648108608)))];
+            tensor<fp16, [1024]> blocks_18_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648110720)))];
+            tensor<fp16, [1, ?, 1024]> var_4053_cast_fp16 = layer_norm(axes = var_4053_axes_0, beta = blocks_18_cross_attn_ln_bias_to_fp16, epsilon = var_3952_to_fp16, gamma = blocks_18_cross_attn_ln_weight_to_fp16, x = x_333_cast_fp16)[name = string("op_4053_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4062_to_fp16 = const()[name = string("op_4062_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648112832)))];
+            tensor<fp16, [1024]> var_4063_to_fp16 = const()[name = string("op_4063_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650210048)))];
+            tensor<fp16, [1, ?, 1024]> linear_148_cast_fp16 = linear(bias = var_4063_to_fp16, weight = var_4062_to_fp16, x = var_4053_cast_fp16)[name = string("linear_148_cast_fp16")];
+            tensor<int32, [3]> concat_414 = const()[name = string("concat_414"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_415 = const()[name = string("concat_415"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_187_internal_tensor_assign_1_stride_0 = const()[name = string("k_187_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_414, begin_mask = k_187_internal_tensor_assign_1_begin_mask_0, end = concat_415, end_mask = k_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_187_internal_tensor_assign_1_squeeze_mask_0, stride = k_187_internal_tensor_assign_1_stride_0, update = k_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("k_187_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_416 = const()[name = string("concat_416"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_417 = const()[name = string("concat_417"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_187_internal_tensor_assign_1_stride_0 = const()[name = string("v_187_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_416, begin_mask = v_187_internal_tensor_assign_1_begin_mask_0, end = concat_417, end_mask = v_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_187_internal_tensor_assign_1_squeeze_mask_0, stride = v_187_internal_tensor_assign_1_stride_0, update = v_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("v_187_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_418x = const()[name = string("concat_418x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4083_cast_fp16 = reshape(shape = concat_418x, x = linear_148_cast_fp16)[name = string("op_4083_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_194_to_fp16 = const()[name = string("const_194_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_151_cast_fp16 = mul(x = var_4083_cast_fp16, y = const_194_to_fp16)[name = string("q_151_cast_fp16")];
+            tensor<int32, [4]> var_4089 = const()[name = string("op_4089"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_4090_cast_fp16 = reshape(shape = var_4089, x = k_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4090_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_189_cast_fp16 = mul(x = var_4090_cast_fp16, y = const_195_to_fp16)[name = string("k_189_cast_fp16")];
+            tensor<int32, [4]> var_4096 = const()[name = string("op_4096"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_4097_cast_fp16 = reshape(shape = var_4096, x = v_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4097_cast_fp16")];
+            tensor<int32, [4]> var_4098 = const()[name = string("op_4098"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_113_transpose_x_0 = const()[name = string("qk_113_transpose_x_0"), val = bool(false)];
+            bool qk_113_transpose_y_0 = const()[name = string("qk_113_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_267_perm_0 = const()[name = string("transpose_267_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_268_perm_0 = const()[name = string("transpose_268_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_268 = transpose(perm = transpose_268_perm_0, x = k_189_cast_fp16)[name = string("transpose_330")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_267 = transpose(perm = transpose_267_perm_0, x = q_151_cast_fp16)[name = string("transpose_331")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_113_cast_fp16 = matmul(transpose_x = qk_113_transpose_x_0, transpose_y = qk_113_transpose_y_0, x = transpose_267, y = transpose_268)[name = string("qk_113_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_4102_cast_fp16 = softmax(axis = var_3946, x = qk_113_cast_fp16)[name = string("op_4102_cast_fp16")];
+            bool var_4104_transpose_x_0 = const()[name = string("op_4104_transpose_x_0"), val = bool(false)];
+            bool var_4104_transpose_y_0 = const()[name = string("op_4104_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_189_cast_fp16 = transpose(perm = var_4098, x = var_4097_cast_fp16)[name = string("transpose_332")];
+            tensor<fp16, [1, 16, ?, 64]> var_4104_cast_fp16 = matmul(transpose_x = var_4104_transpose_x_0, transpose_y = var_4104_transpose_y_0, x = var_4102_cast_fp16, y = v_189_cast_fp16)[name = string("op_4104_cast_fp16")];
+            tensor<int32, [4]> var_4105 = const()[name = string("op_4105"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_419x = const()[name = string("concat_419x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_4106_cast_fp16 = transpose(perm = var_4105, x = var_4104_cast_fp16)[name = string("transpose_329")];
+            tensor<fp16, [1, ?, 1024]> x_337_cast_fp16 = reshape(shape = concat_419x, x = var_4106_cast_fp16)[name = string("x_337_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4110_to_fp16 = const()[name = string("op_4110_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650212160)))];
+            tensor<fp16, [1024]> var_4111_to_fp16 = const()[name = string("op_4111_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652309376)))];
+            tensor<fp16, [1, ?, 1024]> linear_149_cast_fp16 = linear(bias = var_4111_to_fp16, weight = var_4110_to_fp16, x = x_337_cast_fp16)[name = string("linear_149_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_339_cast_fp16 = add(x = x_333_cast_fp16, y = linear_149_cast_fp16)[name = string("x_339_cast_fp16")];
+            tensor<int32, [1]> var_4118_axes_0 = const()[name = string("op_4118_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652311488)))];
+            tensor<fp16, [1024]> blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652313600)))];
+            tensor<fp16, [1, ?, 1024]> var_4118_cast_fp16 = layer_norm(axes = var_4118_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_3952_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_339_cast_fp16)[name = string("op_4118_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_4127_to_fp16 = const()[name = string("op_4127_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652315712)))];
+            tensor<fp16, [4096]> var_4128_to_fp16 = const()[name = string("op_4128_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660704384)))];
+            tensor<fp16, [1, ?, 4096]> linear_150_cast_fp16 = linear(bias = var_4128_to_fp16, weight = var_4127_to_fp16, x = var_4118_cast_fp16)[name = string("linear_150_cast_fp16")];
+            string x_343_mode_0 = const()[name = string("x_343_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_343_cast_fp16 = gelu(mode = x_343_mode_0, x = linear_150_cast_fp16)[name = string("x_343_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_4133_to_fp16 = const()[name = string("op_4133_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660712640)))];
+            tensor<fp16, [1024]> var_4134_to_fp16 = const()[name = string("op_4134_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669101312)))];
+            tensor<fp16, [1, ?, 1024]> linear_151_cast_fp16 = linear(bias = var_4134_to_fp16, weight = var_4133_to_fp16, x = x_343_cast_fp16)[name = string("linear_151_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_345_cast_fp16 = add(x = x_339_cast_fp16, y = linear_151_cast_fp16)[name = string("x_345_cast_fp16")];
+            tensor<int32, [4]> k_cache_77_begin_0 = const()[name = string("k_cache_77_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_77_end_0 = const()[name = string("k_cache_77_end_0"), val = tensor<int32, [4]>([20, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_77_end_mask_0 = const()[name = string("k_cache_77_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_77_squeeze_mask_0 = const()[name = string("k_cache_77_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_77_cast_fp16 = slice_by_index(begin = k_cache_77_begin_0, end = k_cache_77_end_0, end_mask = k_cache_77_end_mask_0, squeeze_mask = k_cache_77_squeeze_mask_0, x = coreml_update_state_84)[name = string("k_cache_77_cast_fp16")];
+            tensor<int32, [4]> v_cache_77_begin_0 = const()[name = string("v_cache_77_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_77_end_0 = const()[name = string("v_cache_77_end_0"), val = tensor<int32, [4]>([20, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_77_end_mask_0 = const()[name = string("v_cache_77_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_77_squeeze_mask_0 = const()[name = string("v_cache_77_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_77_cast_fp16 = slice_by_index(begin = v_cache_77_begin_0, end = v_cache_77_end_0, end_mask = v_cache_77_end_mask_0, squeeze_mask = v_cache_77_squeeze_mask_0, x = coreml_update_state_85)[name = string("v_cache_77_cast_fp16")];
+            tensor<int32, [4]> k_cache_79_begin_0 = const()[name = string("k_cache_79_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_79_end_0 = const()[name = string("k_cache_79_end_0"), val = tensor<int32, [4]>([20, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_79_end_mask_0 = const()[name = string("k_cache_79_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_79_squeeze_mask_0 = const()[name = string("k_cache_79_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_79_cast_fp16 = slice_by_index(begin = k_cache_79_begin_0, end = k_cache_79_end_0, end_mask = k_cache_79_end_mask_0, squeeze_mask = k_cache_79_squeeze_mask_0, x = read_state_2)[name = string("k_cache_79_cast_fp16")];
+            tensor<int32, [4]> v_cache_79_begin_0 = const()[name = string("v_cache_79_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_79_end_0 = const()[name = string("v_cache_79_end_0"), val = tensor<int32, [4]>([20, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_79_end_mask_0 = const()[name = string("v_cache_79_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_79_squeeze_mask_0 = const()[name = string("v_cache_79_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_79_cast_fp16 = slice_by_index(begin = v_cache_79_begin_0, end = v_cache_79_end_0, end_mask = v_cache_79_end_mask_0, squeeze_mask = v_cache_79_squeeze_mask_0, x = read_state_3)[name = string("v_cache_79_cast_fp16")];
+            int32 var_4157 = const()[name = string("op_4157"), val = int32(-1)];
+            tensor<int32, [1]> var_4175_axes_0 = const()[name = string("op_4175_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669103424)))];
+            tensor<fp16, [1024]> blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669105536)))];
+            fp16 var_4163_to_fp16 = const()[name = string("op_4163_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_4175_cast_fp16 = layer_norm(axes = var_4175_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_4163_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_345_cast_fp16)[name = string("op_4175_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4186_to_fp16 = const()[name = string("op_4186_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669107648)))];
+            tensor<fp16, [1024]> var_4187_to_fp16 = const()[name = string("op_4187_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(671204864)))];
+            tensor<fp16, [1, ?, 1024]> linear_152_cast_fp16 = linear(bias = var_4187_to_fp16, weight = var_4186_to_fp16, x = var_4175_cast_fp16)[name = string("linear_152_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4190_to_fp16 = const()[name = string("op_4190_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(671206976)))];
+            tensor<fp16, [1, ?, 1024]> linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4190_to_fp16, x = var_4175_cast_fp16)[name = string("linear_153_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4194_to_fp16 = const()[name = string("op_4194_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673304192)))];
+            tensor<fp16, [1024]> var_4195_to_fp16 = const()[name = string("op_4195_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(675401408)))];
+            tensor<fp16, [1, ?, 1024]> linear_154_cast_fp16 = linear(bias = var_4195_to_fp16, weight = var_4194_to_fp16, x = var_4175_cast_fp16)[name = string("linear_154_cast_fp16")];
+            tensor<int32, [3]> var_4197_shape_cast_fp16 = shape(x = linear_152_cast_fp16)[name = string("op_4197_shape_cast_fp16")];
+            int32 gather_230_axis_0 = const()[name = string("gather_230_axis_0"), val = int32(0)];
+            int32 gather_230_batch_dims_0 = const()[name = string("gather_230_batch_dims_0"), val = int32(0)];
+            bool gather_230_validate_indices_0 = const()[name = string("gather_230_validate_indices_0"), val = bool(false)];
+            string var_4197_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4197_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_230_to_uint16 = const()[name = string("select_230_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4197_shape_cast_fp16_to_uint16 = cast(dtype = var_4197_shape_cast_fp16_to_uint16_dtype_0, x = var_4197_shape_cast_fp16)[name = string("cast_256")];
+            uint16 gather_230_cast_uint16 = gather(axis = gather_230_axis_0, batch_dims = gather_230_batch_dims_0, indices = select_230_to_uint16, validate_indices = gather_230_validate_indices_0, x = var_4197_shape_cast_fp16_to_uint16)[name = string("gather_230_cast_uint16")];
+            string gather_230_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_230_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_230_cast_uint16_to_int32 = cast(dtype = gather_230_cast_uint16_to_int32_dtype_0, x = gather_230_cast_uint16)[name = string("cast_255")];
+            int32 end_step_41 = add(x = offset, y = gather_230_cast_uint16_to_int32)[name = string("end_step_41")];
+            tensor<int32, [1]> expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_307_axes_0 = const()[name = string("expand_dims_307_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_307 = expand_dims(axes = expand_dims_307_axes_0, x = end_step_41)[name = string("expand_dims_307")];
+            tensor<int32, [1]> concat_422_values0_0 = const()[name = string("concat_422_values0_0"), val = tensor<int32, [1]>([19])];
+            int32 concat_422_axis_0 = const()[name = string("concat_422_axis_0"), val = int32(0)];
+            bool concat_422_interleave_0 = const()[name = string("concat_422_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_422 = concat(axis = concat_422_axis_0, interleave = concat_422_interleave_0, values = (concat_422_values0_0, expand_dims_304, expand_dims_1, expand_dims_306))[name = string("concat_422")];
+            tensor<int32, [1]> concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_423_values1_0 = const()[name = string("concat_423_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_423_values3_0 = const()[name = string("concat_423_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)];
+            bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, concat_423_values1_0, expand_dims_307, concat_423_values3_0))[name = string("concat_423")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = k_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = k_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_20_stride_0, update = linear_153_cast_fp16, x = coreml_update_state_84)[name = string("k_cache1_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_20_cast_fp16, input = k_cache1)[name = string("coreml_update_state_86_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_86 = read_state(input = k_cache1)[name = string("coreml_update_state_86")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_20_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = v_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = v_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_20_stride_0, update = linear_154_cast_fp16, x = coreml_update_state_85)[name = string("v_cache1_internal_tensor_assign_20_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_20_cast_fp16, input = v_cache1)[name = string("coreml_update_state_87_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_87 = read_state(input = v_cache1)[name = string("coreml_update_state_87")];
+            int32 concat_428_values0_0 = const()[name = string("concat_428_values0_0"), val = int32(1)];
+            int32 concat_428_values2_0 = const()[name = string("concat_428_values2_0"), val = int32(1024)];
+            int32 concat_428_axis_0 = const()[name = string("concat_428_axis_0"), val = int32(0)];
+            bool concat_428_interleave_0 = const()[name = string("concat_428_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_428 = concat(axis = concat_428_axis_0, interleave = concat_428_interleave_0, values = (concat_428_values0_0, end_step_41, concat_428_values2_0))[name = string("concat_428")];
+            tensor<int32, [3]> var_4213_begin_0 = const()[name = string("op_4213_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4213_end_mask_0 = const()[name = string("op_4213_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_4213_cast_fp16 = slice_by_index(begin = var_4213_begin_0, end = concat_428, end_mask = var_4213_end_mask_0, x = k_cache_77_cast_fp16)[name = string("op_4213_cast_fp16")];
+            tensor<int32, [3]> var_4216_begin_0 = const()[name = string("op_4216_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4216_end_mask_0 = const()[name = string("op_4216_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_4216_cast_fp16 = slice_by_index(begin = var_4216_begin_0, end = concat_428, end_mask = var_4216_end_mask_0, x = v_cache_77_cast_fp16)[name = string("op_4216_cast_fp16")];
+            tensor<int32, [4]> concat_430x = const()[name = string("concat_430x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4226_cast_fp16 = reshape(shape = concat_430x, x = linear_152_cast_fp16)[name = string("op_4226_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_155_cast_fp16 = mul(x = var_4226_cast_fp16, y = const_196_to_fp16)[name = string("q_155_cast_fp16")];
+            tensor<int32, [4]> concat_431x = const()[name = string("concat_431x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4233_cast_fp16 = reshape(shape = concat_431x, x = var_4213_cast_fp16)[name = string("op_4233_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_195_cast_fp16 = mul(x = var_4233_cast_fp16, y = const_197_to_fp16)[name = string("k_195_cast_fp16")];
+            tensor<int32, [4]> concat_432x = const()[name = string("concat_432x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4240_cast_fp16 = reshape(shape = concat_432x, x = var_4216_cast_fp16)[name = string("op_4240_cast_fp16")];
+            tensor<int32, [4]> var_4241 = const()[name = string("op_4241"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_115_transpose_x_0 = const()[name = string("qk_115_transpose_x_0"), val = bool(false)];
+            bool qk_115_transpose_y_0 = const()[name = string("qk_115_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_269_perm_0 = const()[name = string("transpose_269_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_270_perm_0 = const()[name = string("transpose_270_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_270 = transpose(perm = transpose_270_perm_0, x = k_195_cast_fp16)[name = string("transpose_326")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_269 = transpose(perm = transpose_269_perm_0, x = q_155_cast_fp16)[name = string("transpose_327")];
+            tensor<fp16, [1, 16, ?, ?]> qk_115_cast_fp16 = matmul(transpose_x = qk_115_transpose_x_0, transpose_y = qk_115_transpose_y_0, x = transpose_269, y = transpose_270)[name = string("qk_115_cast_fp16")];
+            int32 concat_433_values1_0 = const()[name = string("concat_433_values1_0"), val = int32(448)];
+            int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)];
+            bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_230_cast_uint16_to_int32, concat_433_values1_0))[name = string("concat_433")];
+            tensor<int32, [2]> var_4244_begin_0 = const()[name = string("op_4244_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4244_end_mask_0 = const()[name = string("op_4244_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4244_cast_fp16 = slice_by_index(begin = var_4244_begin_0, end = concat_433, end_mask = var_4244_end_mask_0, x = mask_to_fp16)[name = string("op_4244_cast_fp16")];
+            int32 concat_434_values0_0 = const()[name = string("concat_434_values0_0"), val = int32(0)];
+            int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)];
+            bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (concat_434_values0_0, gather_230_cast_uint16_to_int32))[name = string("concat_434")];
+            tensor<int32, [2]> var_4245_begin_0 = const()[name = string("op_4245_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4245_end_mask_0 = const()[name = string("op_4245_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4245_cast_fp16 = slice_by_index(begin = var_4245_begin_0, end = concat_434, end_mask = var_4245_end_mask_0, x = var_4244_cast_fp16)[name = string("op_4245_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_117_cast_fp16 = add(x = qk_115_cast_fp16, y = var_4245_cast_fp16)[name = string("qk_117_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_4248_cast_fp16 = softmax(axis = var_4157, x = qk_117_cast_fp16)[name = string("op_4248_cast_fp16")];
+            bool var_4250_transpose_x_0 = const()[name = string("op_4250_transpose_x_0"), val = bool(false)];
+            bool var_4250_transpose_y_0 = const()[name = string("op_4250_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_195_cast_fp16 = transpose(perm = var_4241, x = var_4240_cast_fp16)[name = string("transpose_328")];
+            tensor<fp16, [1, 16, ?, 64]> var_4250_cast_fp16 = matmul(transpose_x = var_4250_transpose_x_0, transpose_y = var_4250_transpose_y_0, x = var_4248_cast_fp16, y = v_195_cast_fp16)[name = string("op_4250_cast_fp16")];
+            tensor<int32, [4]> var_4251 = const()[name = string("op_4251"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_435x = const()[name = string("concat_435x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_4252_cast_fp16 = transpose(perm = var_4251, x = var_4250_cast_fp16)[name = string("transpose_325")];
+            tensor<fp16, [1, ?, 1024]> x_349_cast_fp16 = reshape(shape = concat_435x, x = var_4252_cast_fp16)[name = string("x_349_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4256_to_fp16 = const()[name = string("op_4256_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(675403520)))];
+            tensor<fp16, [1024]> var_4257_to_fp16 = const()[name = string("op_4257_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677500736)))];
+            tensor<fp16, [1, ?, 1024]> linear_155_cast_fp16 = linear(bias = var_4257_to_fp16, weight = var_4256_to_fp16, x = x_349_cast_fp16)[name = string("linear_155_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_351_cast_fp16 = add(x = x_345_cast_fp16, y = linear_155_cast_fp16)[name = string("x_351_cast_fp16")];
+            tensor<int32, [1]> var_4264_axes_0 = const()[name = string("op_4264_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_19_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677502848)))];
+            tensor<fp16, [1024]> blocks_19_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677504960)))];
+            tensor<fp16, [1, ?, 1024]> var_4264_cast_fp16 = layer_norm(axes = var_4264_axes_0, beta = blocks_19_cross_attn_ln_bias_to_fp16, epsilon = var_4163_to_fp16, gamma = blocks_19_cross_attn_ln_weight_to_fp16, x = x_351_cast_fp16)[name = string("op_4264_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4273_to_fp16 = const()[name = string("op_4273_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677507072)))];
+            tensor<fp16, [1024]> var_4274_to_fp16 = const()[name = string("op_4274_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679604288)))];
+            tensor<fp16, [1, ?, 1024]> linear_156_cast_fp16 = linear(bias = var_4274_to_fp16, weight = var_4273_to_fp16, x = var_4264_cast_fp16)[name = string("linear_156_cast_fp16")];
+            tensor<int32, [3]> concat_436 = const()[name = string("concat_436"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_437 = const()[name = string("concat_437"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_197_internal_tensor_assign_1_stride_0 = const()[name = string("k_197_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_436, begin_mask = k_197_internal_tensor_assign_1_begin_mask_0, end = concat_437, end_mask = k_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_197_internal_tensor_assign_1_squeeze_mask_0, stride = k_197_internal_tensor_assign_1_stride_0, update = k_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("k_197_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_438 = const()[name = string("concat_438"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_439 = const()[name = string("concat_439"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_197_internal_tensor_assign_1_stride_0 = const()[name = string("v_197_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_438, begin_mask = v_197_internal_tensor_assign_1_begin_mask_0, end = concat_439, end_mask = v_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_197_internal_tensor_assign_1_squeeze_mask_0, stride = v_197_internal_tensor_assign_1_stride_0, update = v_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("v_197_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_440x = const()[name = string("concat_440x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4294_cast_fp16 = reshape(shape = concat_440x, x = linear_156_cast_fp16)[name = string("op_4294_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_159_cast_fp16 = mul(x = var_4294_cast_fp16, y = const_198_to_fp16)[name = string("q_159_cast_fp16")];
+            tensor<int32, [4]> var_4300 = const()[name = string("op_4300"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_4301_cast_fp16 = reshape(shape = var_4300, x = k_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4301_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_199_cast_fp16 = mul(x = var_4301_cast_fp16, y = const_199_to_fp16)[name = string("k_199_cast_fp16")];
+            tensor<int32, [4]> var_4307 = const()[name = string("op_4307"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_4308_cast_fp16 = reshape(shape = var_4307, x = v_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4308_cast_fp16")];
+            tensor<int32, [4]> var_4309 = const()[name = string("op_4309"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_119_transpose_x_0 = const()[name = string("qk_119_transpose_x_0"), val = bool(false)];
+            bool qk_119_transpose_y_0 = const()[name = string("qk_119_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_271_perm_0 = const()[name = string("transpose_271_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_272_perm_0 = const()[name = string("transpose_272_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_272 = transpose(perm = transpose_272_perm_0, x = k_199_cast_fp16)[name = string("transpose_322")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_271 = transpose(perm = transpose_271_perm_0, x = q_159_cast_fp16)[name = string("transpose_323")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_119_cast_fp16 = matmul(transpose_x = qk_119_transpose_x_0, transpose_y = qk_119_transpose_y_0, x = transpose_271, y = transpose_272)[name = string("qk_119_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_4313_cast_fp16 = softmax(axis = var_4157, x = qk_119_cast_fp16)[name = string("op_4313_cast_fp16")];
+            bool var_4315_transpose_x_0 = const()[name = string("op_4315_transpose_x_0"), val = bool(false)];
+            bool var_4315_transpose_y_0 = const()[name = string("op_4315_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_199_cast_fp16 = transpose(perm = var_4309, x = var_4308_cast_fp16)[name = string("transpose_324")];
+            tensor<fp16, [1, 16, ?, 64]> var_4315_cast_fp16 = matmul(transpose_x = var_4315_transpose_x_0, transpose_y = var_4315_transpose_y_0, x = var_4313_cast_fp16, y = v_199_cast_fp16)[name = string("op_4315_cast_fp16")];
+            tensor<int32, [4]> var_4316 = const()[name = string("op_4316"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_441x = const()[name = string("concat_441x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_4317_cast_fp16 = transpose(perm = var_4316, x = var_4315_cast_fp16)[name = string("transpose_321")];
+            tensor<fp16, [1, ?, 1024]> x_355_cast_fp16 = reshape(shape = concat_441x, x = var_4317_cast_fp16)[name = string("x_355_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4321_to_fp16 = const()[name = string("op_4321_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679606400)))];
+            tensor<fp16, [1024]> var_4322_to_fp16 = const()[name = string("op_4322_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681703616)))];
+            tensor<fp16, [1, ?, 1024]> linear_157_cast_fp16 = linear(bias = var_4322_to_fp16, weight = var_4321_to_fp16, x = x_355_cast_fp16)[name = string("linear_157_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_357_cast_fp16 = add(x = x_351_cast_fp16, y = linear_157_cast_fp16)[name = string("x_357_cast_fp16")];
+            tensor<int32, [1]> var_4329_axes_0 = const()[name = string("op_4329_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681705728)))];
+            tensor<fp16, [1024]> blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681707840)))];
+            tensor<fp16, [1, ?, 1024]> var_4329_cast_fp16 = layer_norm(axes = var_4329_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_4163_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_357_cast_fp16)[name = string("op_4329_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_4338_to_fp16 = const()[name = string("op_4338_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681709952)))];
+            tensor<fp16, [4096]> var_4339_to_fp16 = const()[name = string("op_4339_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(690098624)))];
+            tensor<fp16, [1, ?, 4096]> linear_158_cast_fp16 = linear(bias = var_4339_to_fp16, weight = var_4338_to_fp16, x = var_4329_cast_fp16)[name = string("linear_158_cast_fp16")];
+            string x_361_mode_0 = const()[name = string("x_361_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_361_cast_fp16 = gelu(mode = x_361_mode_0, x = linear_158_cast_fp16)[name = string("x_361_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_4344_to_fp16 = const()[name = string("op_4344_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(690106880)))];
+            tensor<fp16, [1024]> var_4345_to_fp16 = const()[name = string("op_4345_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698495552)))];
+            tensor<fp16, [1, ?, 1024]> linear_159_cast_fp16 = linear(bias = var_4345_to_fp16, weight = var_4344_to_fp16, x = x_361_cast_fp16)[name = string("linear_159_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_159_cast_fp16)[name = string("x_363_cast_fp16")];
+            tensor<int32, [4]> k_cache_81_begin_0 = const()[name = string("k_cache_81_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_81_end_0 = const()[name = string("k_cache_81_end_0"), val = tensor<int32, [4]>([21, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_81_end_mask_0 = const()[name = string("k_cache_81_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_81_squeeze_mask_0 = const()[name = string("k_cache_81_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_81_cast_fp16 = slice_by_index(begin = k_cache_81_begin_0, end = k_cache_81_end_0, end_mask = k_cache_81_end_mask_0, squeeze_mask = k_cache_81_squeeze_mask_0, x = coreml_update_state_86)[name = string("k_cache_81_cast_fp16")];
+            tensor<int32, [4]> v_cache_81_begin_0 = const()[name = string("v_cache_81_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_81_end_0 = const()[name = string("v_cache_81_end_0"), val = tensor<int32, [4]>([21, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_81_end_mask_0 = const()[name = string("v_cache_81_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_81_squeeze_mask_0 = const()[name = string("v_cache_81_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_81_cast_fp16 = slice_by_index(begin = v_cache_81_begin_0, end = v_cache_81_end_0, end_mask = v_cache_81_end_mask_0, squeeze_mask = v_cache_81_squeeze_mask_0, x = coreml_update_state_87)[name = string("v_cache_81_cast_fp16")];
+            tensor<int32, [4]> k_cache_83_begin_0 = const()[name = string("k_cache_83_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_83_end_0 = const()[name = string("k_cache_83_end_0"), val = tensor<int32, [4]>([21, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_83_end_mask_0 = const()[name = string("k_cache_83_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_83_squeeze_mask_0 = const()[name = string("k_cache_83_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_83_cast_fp16 = slice_by_index(begin = k_cache_83_begin_0, end = k_cache_83_end_0, end_mask = k_cache_83_end_mask_0, squeeze_mask = k_cache_83_squeeze_mask_0, x = read_state_2)[name = string("k_cache_83_cast_fp16")];
+            tensor<int32, [4]> v_cache_83_begin_0 = const()[name = string("v_cache_83_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_83_end_0 = const()[name = string("v_cache_83_end_0"), val = tensor<int32, [4]>([21, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_83_end_mask_0 = const()[name = string("v_cache_83_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_83_squeeze_mask_0 = const()[name = string("v_cache_83_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_83_cast_fp16 = slice_by_index(begin = v_cache_83_begin_0, end = v_cache_83_end_0, end_mask = v_cache_83_end_mask_0, squeeze_mask = v_cache_83_squeeze_mask_0, x = read_state_3)[name = string("v_cache_83_cast_fp16")];
+            int32 var_4368 = const()[name = string("op_4368"), val = int32(-1)];
+            tensor<int32, [1]> var_4386_axes_0 = const()[name = string("op_4386_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698497664)))];
+            tensor<fp16, [1024]> blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698499776)))];
+            fp16 var_4374_to_fp16 = const()[name = string("op_4374_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_4386_cast_fp16 = layer_norm(axes = var_4386_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_4374_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_363_cast_fp16)[name = string("op_4386_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4397_to_fp16 = const()[name = string("op_4397_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698501888)))];
+            tensor<fp16, [1024]> var_4398_to_fp16 = const()[name = string("op_4398_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700599104)))];
+            tensor<fp16, [1, ?, 1024]> linear_160_cast_fp16 = linear(bias = var_4398_to_fp16, weight = var_4397_to_fp16, x = var_4386_cast_fp16)[name = string("linear_160_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4401_to_fp16 = const()[name = string("op_4401_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700601216)))];
+            tensor<fp16, [1, ?, 1024]> linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4401_to_fp16, x = var_4386_cast_fp16)[name = string("linear_161_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4405_to_fp16 = const()[name = string("op_4405_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702698432)))];
+            tensor<fp16, [1024]> var_4406_to_fp16 = const()[name = string("op_4406_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704795648)))];
+            tensor<fp16, [1, ?, 1024]> linear_162_cast_fp16 = linear(bias = var_4406_to_fp16, weight = var_4405_to_fp16, x = var_4386_cast_fp16)[name = string("linear_162_cast_fp16")];
+            tensor<int32, [3]> var_4408_shape_cast_fp16 = shape(x = linear_160_cast_fp16)[name = string("op_4408_shape_cast_fp16")];
+            int32 gather_242_axis_0 = const()[name = string("gather_242_axis_0"), val = int32(0)];
+            int32 gather_242_batch_dims_0 = const()[name = string("gather_242_batch_dims_0"), val = int32(0)];
+            bool gather_242_validate_indices_0 = const()[name = string("gather_242_validate_indices_0"), val = bool(false)];
+            string var_4408_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4408_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_242_to_uint16 = const()[name = string("select_242_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4408_shape_cast_fp16_to_uint16 = cast(dtype = var_4408_shape_cast_fp16_to_uint16_dtype_0, x = var_4408_shape_cast_fp16)[name = string("cast_254")];
+            uint16 gather_242_cast_uint16 = gather(axis = gather_242_axis_0, batch_dims = gather_242_batch_dims_0, indices = select_242_to_uint16, validate_indices = gather_242_validate_indices_0, x = var_4408_shape_cast_fp16_to_uint16)[name = string("gather_242_cast_uint16")];
+            string gather_242_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_242_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_242_cast_uint16_to_int32 = cast(dtype = gather_242_cast_uint16_to_int32_dtype_0, x = gather_242_cast_uint16)[name = string("cast_253")];
+            int32 end_step_43 = add(x = offset, y = gather_242_cast_uint16_to_int32)[name = string("end_step_43")];
+            tensor<int32, [1]> expand_dims_320 = const()[name = string("expand_dims_320"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_323_axes_0 = const()[name = string("expand_dims_323_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_323 = expand_dims(axes = expand_dims_323_axes_0, x = end_step_43)[name = string("expand_dims_323")];
+            tensor<int32, [1]> concat_444_values0_0 = const()[name = string("concat_444_values0_0"), val = tensor<int32, [1]>([20])];
+            int32 concat_444_axis_0 = const()[name = string("concat_444_axis_0"), val = int32(0)];
+            bool concat_444_interleave_0 = const()[name = string("concat_444_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_444 = concat(axis = concat_444_axis_0, interleave = concat_444_interleave_0, values = (concat_444_values0_0, expand_dims_320, expand_dims_1, expand_dims_322))[name = string("concat_444")];
+            tensor<int32, [1]> concat_445_values0_0 = const()[name = string("concat_445_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_445_values1_0 = const()[name = string("concat_445_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_445_values3_0 = const()[name = string("concat_445_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_445_axis_0 = const()[name = string("concat_445_axis_0"), val = int32(0)];
+            bool concat_445_interleave_0 = const()[name = string("concat_445_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_445 = concat(axis = concat_445_axis_0, interleave = concat_445_interleave_0, values = (concat_445_values0_0, concat_445_values1_0, expand_dims_323, concat_445_values3_0))[name = string("concat_445")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = k_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = k_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_21_stride_0, update = linear_161_cast_fp16, x = coreml_update_state_86)[name = string("k_cache1_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_21_cast_fp16, input = k_cache1)[name = string("coreml_update_state_88_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_88 = read_state(input = k_cache1)[name = string("coreml_update_state_88")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_21_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = v_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = v_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_21_stride_0, update = linear_162_cast_fp16, x = coreml_update_state_87)[name = string("v_cache1_internal_tensor_assign_21_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_21_cast_fp16, input = v_cache1)[name = string("coreml_update_state_89_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_89 = read_state(input = v_cache1)[name = string("coreml_update_state_89")];
+            int32 concat_450_values0_0 = const()[name = string("concat_450_values0_0"), val = int32(1)];
+            int32 concat_450_values2_0 = const()[name = string("concat_450_values2_0"), val = int32(1024)];
+            int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)];
+            bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (concat_450_values0_0, end_step_43, concat_450_values2_0))[name = string("concat_450")];
+            tensor<int32, [3]> var_4424_begin_0 = const()[name = string("op_4424_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4424_end_mask_0 = const()[name = string("op_4424_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_4424_cast_fp16 = slice_by_index(begin = var_4424_begin_0, end = concat_450, end_mask = var_4424_end_mask_0, x = k_cache_81_cast_fp16)[name = string("op_4424_cast_fp16")];
+            tensor<int32, [3]> var_4427_begin_0 = const()[name = string("op_4427_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4427_end_mask_0 = const()[name = string("op_4427_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_4427_cast_fp16 = slice_by_index(begin = var_4427_begin_0, end = concat_450, end_mask = var_4427_end_mask_0, x = v_cache_81_cast_fp16)[name = string("op_4427_cast_fp16")];
+            tensor<int32, [4]> concat_452x = const()[name = string("concat_452x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4437_cast_fp16 = reshape(shape = concat_452x, x = linear_160_cast_fp16)[name = string("op_4437_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_163_cast_fp16 = mul(x = var_4437_cast_fp16, y = const_200_to_fp16)[name = string("q_163_cast_fp16")];
+            tensor<int32, [4]> concat_453x = const()[name = string("concat_453x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4444_cast_fp16 = reshape(shape = concat_453x, x = var_4424_cast_fp16)[name = string("op_4444_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_205_cast_fp16 = mul(x = var_4444_cast_fp16, y = const_201_to_fp16)[name = string("k_205_cast_fp16")];
+            tensor<int32, [4]> concat_454x = const()[name = string("concat_454x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4451_cast_fp16 = reshape(shape = concat_454x, x = var_4427_cast_fp16)[name = string("op_4451_cast_fp16")];
+            tensor<int32, [4]> var_4452 = const()[name = string("op_4452"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_121_transpose_x_0 = const()[name = string("qk_121_transpose_x_0"), val = bool(false)];
+            bool qk_121_transpose_y_0 = const()[name = string("qk_121_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_273_perm_0 = const()[name = string("transpose_273_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_274_perm_0 = const()[name = string("transpose_274_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_274 = transpose(perm = transpose_274_perm_0, x = k_205_cast_fp16)[name = string("transpose_318")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_273 = transpose(perm = transpose_273_perm_0, x = q_163_cast_fp16)[name = string("transpose_319")];
+            tensor<fp16, [1, 16, ?, ?]> qk_121_cast_fp16 = matmul(transpose_x = qk_121_transpose_x_0, transpose_y = qk_121_transpose_y_0, x = transpose_273, y = transpose_274)[name = string("qk_121_cast_fp16")];
+            int32 concat_455_values1_0 = const()[name = string("concat_455_values1_0"), val = int32(448)];
+            int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)];
+            bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (gather_242_cast_uint16_to_int32, concat_455_values1_0))[name = string("concat_455")];
+            tensor<int32, [2]> var_4455_begin_0 = const()[name = string("op_4455_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4455_end_mask_0 = const()[name = string("op_4455_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4455_cast_fp16 = slice_by_index(begin = var_4455_begin_0, end = concat_455, end_mask = var_4455_end_mask_0, x = mask_to_fp16)[name = string("op_4455_cast_fp16")];
+            int32 concat_456_values0_0 = const()[name = string("concat_456_values0_0"), val = int32(0)];
+            int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)];
+            bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (concat_456_values0_0, gather_242_cast_uint16_to_int32))[name = string("concat_456")];
+            tensor<int32, [2]> var_4456_begin_0 = const()[name = string("op_4456_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4456_end_mask_0 = const()[name = string("op_4456_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4456_cast_fp16 = slice_by_index(begin = var_4456_begin_0, end = concat_456, end_mask = var_4456_end_mask_0, x = var_4455_cast_fp16)[name = string("op_4456_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_123_cast_fp16 = add(x = qk_121_cast_fp16, y = var_4456_cast_fp16)[name = string("qk_123_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_4459_cast_fp16 = softmax(axis = var_4368, x = qk_123_cast_fp16)[name = string("op_4459_cast_fp16")];
+            bool var_4461_transpose_x_0 = const()[name = string("op_4461_transpose_x_0"), val = bool(false)];
+            bool var_4461_transpose_y_0 = const()[name = string("op_4461_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_205_cast_fp16 = transpose(perm = var_4452, x = var_4451_cast_fp16)[name = string("transpose_320")];
+            tensor<fp16, [1, 16, ?, 64]> var_4461_cast_fp16 = matmul(transpose_x = var_4461_transpose_x_0, transpose_y = var_4461_transpose_y_0, x = var_4459_cast_fp16, y = v_205_cast_fp16)[name = string("op_4461_cast_fp16")];
+            tensor<int32, [4]> var_4462 = const()[name = string("op_4462"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_457x = const()[name = string("concat_457x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_4463_cast_fp16 = transpose(perm = var_4462, x = var_4461_cast_fp16)[name = string("transpose_317")];
+            tensor<fp16, [1, ?, 1024]> x_367_cast_fp16 = reshape(shape = concat_457x, x = var_4463_cast_fp16)[name = string("x_367_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4467_to_fp16 = const()[name = string("op_4467_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704797760)))];
+            tensor<fp16, [1024]> var_4468_to_fp16 = const()[name = string("op_4468_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706894976)))];
+            tensor<fp16, [1, ?, 1024]> linear_163_cast_fp16 = linear(bias = var_4468_to_fp16, weight = var_4467_to_fp16, x = x_367_cast_fp16)[name = string("linear_163_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_369_cast_fp16 = add(x = x_363_cast_fp16, y = linear_163_cast_fp16)[name = string("x_369_cast_fp16")];
+            tensor<int32, [1]> var_4475_axes_0 = const()[name = string("op_4475_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_20_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706897088)))];
+            tensor<fp16, [1024]> blocks_20_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706899200)))];
+            tensor<fp16, [1, ?, 1024]> var_4475_cast_fp16 = layer_norm(axes = var_4475_axes_0, beta = blocks_20_cross_attn_ln_bias_to_fp16, epsilon = var_4374_to_fp16, gamma = blocks_20_cross_attn_ln_weight_to_fp16, x = x_369_cast_fp16)[name = string("op_4475_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4484_to_fp16 = const()[name = string("op_4484_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706901312)))];
+            tensor<fp16, [1024]> var_4485_to_fp16 = const()[name = string("op_4485_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708998528)))];
+            tensor<fp16, [1, ?, 1024]> linear_164_cast_fp16 = linear(bias = var_4485_to_fp16, weight = var_4484_to_fp16, x = var_4475_cast_fp16)[name = string("linear_164_cast_fp16")];
+            tensor<int32, [3]> concat_458 = const()[name = string("concat_458"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_459 = const()[name = string("concat_459"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_207_internal_tensor_assign_1_stride_0 = const()[name = string("k_207_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_458, begin_mask = k_207_internal_tensor_assign_1_begin_mask_0, end = concat_459, end_mask = k_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_207_internal_tensor_assign_1_squeeze_mask_0, stride = k_207_internal_tensor_assign_1_stride_0, update = k_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("k_207_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_460 = const()[name = string("concat_460"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_461 = const()[name = string("concat_461"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_207_internal_tensor_assign_1_stride_0 = const()[name = string("v_207_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_460, begin_mask = v_207_internal_tensor_assign_1_begin_mask_0, end = concat_461, end_mask = v_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_207_internal_tensor_assign_1_squeeze_mask_0, stride = v_207_internal_tensor_assign_1_stride_0, update = v_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("v_207_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_462x = const()[name = string("concat_462x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4505_cast_fp16 = reshape(shape = concat_462x, x = linear_164_cast_fp16)[name = string("op_4505_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_167_cast_fp16 = mul(x = var_4505_cast_fp16, y = const_202_to_fp16)[name = string("q_167_cast_fp16")];
+            tensor<int32, [4]> var_4511 = const()[name = string("op_4511"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_4512_cast_fp16 = reshape(shape = var_4511, x = k_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4512_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_209_cast_fp16 = mul(x = var_4512_cast_fp16, y = const_203_to_fp16)[name = string("k_209_cast_fp16")];
+            tensor<int32, [4]> var_4518 = const()[name = string("op_4518"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_4519_cast_fp16 = reshape(shape = var_4518, x = v_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4519_cast_fp16")];
+            tensor<int32, [4]> var_4520 = const()[name = string("op_4520"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_125_transpose_x_0 = const()[name = string("qk_125_transpose_x_0"), val = bool(false)];
+            bool qk_125_transpose_y_0 = const()[name = string("qk_125_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_275_perm_0 = const()[name = string("transpose_275_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_276_perm_0 = const()[name = string("transpose_276_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_276 = transpose(perm = transpose_276_perm_0, x = k_209_cast_fp16)[name = string("transpose_314")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_275 = transpose(perm = transpose_275_perm_0, x = q_167_cast_fp16)[name = string("transpose_315")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_125_cast_fp16 = matmul(transpose_x = qk_125_transpose_x_0, transpose_y = qk_125_transpose_y_0, x = transpose_275, y = transpose_276)[name = string("qk_125_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_4524_cast_fp16 = softmax(axis = var_4368, x = qk_125_cast_fp16)[name = string("op_4524_cast_fp16")];
+            bool var_4526_transpose_x_0 = const()[name = string("op_4526_transpose_x_0"), val = bool(false)];
+            bool var_4526_transpose_y_0 = const()[name = string("op_4526_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_209_cast_fp16 = transpose(perm = var_4520, x = var_4519_cast_fp16)[name = string("transpose_316")];
+            tensor<fp16, [1, 16, ?, 64]> var_4526_cast_fp16 = matmul(transpose_x = var_4526_transpose_x_0, transpose_y = var_4526_transpose_y_0, x = var_4524_cast_fp16, y = v_209_cast_fp16)[name = string("op_4526_cast_fp16")];
+            tensor<int32, [4]> var_4527 = const()[name = string("op_4527"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_463x = const()[name = string("concat_463x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_4528_cast_fp16 = transpose(perm = var_4527, x = var_4526_cast_fp16)[name = string("transpose_313")];
+            tensor<fp16, [1, ?, 1024]> x_373_cast_fp16 = reshape(shape = concat_463x, x = var_4528_cast_fp16)[name = string("x_373_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4532_to_fp16 = const()[name = string("op_4532_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(709000640)))];
+            tensor<fp16, [1024]> var_4533_to_fp16 = const()[name = string("op_4533_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711097856)))];
+            tensor<fp16, [1, ?, 1024]> linear_165_cast_fp16 = linear(bias = var_4533_to_fp16, weight = var_4532_to_fp16, x = x_373_cast_fp16)[name = string("linear_165_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_375_cast_fp16 = add(x = x_369_cast_fp16, y = linear_165_cast_fp16)[name = string("x_375_cast_fp16")];
+            tensor<int32, [1]> var_4540_axes_0 = const()[name = string("op_4540_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711099968)))];
+            tensor<fp16, [1024]> blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711102080)))];
+            tensor<fp16, [1, ?, 1024]> var_4540_cast_fp16 = layer_norm(axes = var_4540_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_4374_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_375_cast_fp16)[name = string("op_4540_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_4549_to_fp16 = const()[name = string("op_4549_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711104192)))];
+            tensor<fp16, [4096]> var_4550_to_fp16 = const()[name = string("op_4550_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(719492864)))];
+            tensor<fp16, [1, ?, 4096]> linear_166_cast_fp16 = linear(bias = var_4550_to_fp16, weight = var_4549_to_fp16, x = var_4540_cast_fp16)[name = string("linear_166_cast_fp16")];
+            string x_379_mode_0 = const()[name = string("x_379_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_379_cast_fp16 = gelu(mode = x_379_mode_0, x = linear_166_cast_fp16)[name = string("x_379_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_4555_to_fp16 = const()[name = string("op_4555_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(719501120)))];
+            tensor<fp16, [1024]> var_4556_to_fp16 = const()[name = string("op_4556_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727889792)))];
+            tensor<fp16, [1, ?, 1024]> linear_167_cast_fp16 = linear(bias = var_4556_to_fp16, weight = var_4555_to_fp16, x = x_379_cast_fp16)[name = string("linear_167_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_381_cast_fp16 = add(x = x_375_cast_fp16, y = linear_167_cast_fp16)[name = string("x_381_cast_fp16")];
+            tensor<int32, [4]> k_cache_85_begin_0 = const()[name = string("k_cache_85_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_85_end_0 = const()[name = string("k_cache_85_end_0"), val = tensor<int32, [4]>([22, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_85_end_mask_0 = const()[name = string("k_cache_85_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_85_squeeze_mask_0 = const()[name = string("k_cache_85_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_85_cast_fp16 = slice_by_index(begin = k_cache_85_begin_0, end = k_cache_85_end_0, end_mask = k_cache_85_end_mask_0, squeeze_mask = k_cache_85_squeeze_mask_0, x = coreml_update_state_88)[name = string("k_cache_85_cast_fp16")];
+            tensor<int32, [4]> v_cache_85_begin_0 = const()[name = string("v_cache_85_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_85_end_0 = const()[name = string("v_cache_85_end_0"), val = tensor<int32, [4]>([22, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_85_end_mask_0 = const()[name = string("v_cache_85_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_85_squeeze_mask_0 = const()[name = string("v_cache_85_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_85_cast_fp16 = slice_by_index(begin = v_cache_85_begin_0, end = v_cache_85_end_0, end_mask = v_cache_85_end_mask_0, squeeze_mask = v_cache_85_squeeze_mask_0, x = coreml_update_state_89)[name = string("v_cache_85_cast_fp16")];
+            tensor<int32, [4]> k_cache_87_begin_0 = const()[name = string("k_cache_87_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_87_end_0 = const()[name = string("k_cache_87_end_0"), val = tensor<int32, [4]>([22, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_87_end_mask_0 = const()[name = string("k_cache_87_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_87_squeeze_mask_0 = const()[name = string("k_cache_87_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_87_cast_fp16 = slice_by_index(begin = k_cache_87_begin_0, end = k_cache_87_end_0, end_mask = k_cache_87_end_mask_0, squeeze_mask = k_cache_87_squeeze_mask_0, x = read_state_2)[name = string("k_cache_87_cast_fp16")];
+            tensor<int32, [4]> v_cache_87_begin_0 = const()[name = string("v_cache_87_begin_0"), val = tensor<int32, [4]>([21, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_87_end_0 = const()[name = string("v_cache_87_end_0"), val = tensor<int32, [4]>([22, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_87_end_mask_0 = const()[name = string("v_cache_87_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_87_squeeze_mask_0 = const()[name = string("v_cache_87_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_87_cast_fp16 = slice_by_index(begin = v_cache_87_begin_0, end = v_cache_87_end_0, end_mask = v_cache_87_end_mask_0, squeeze_mask = v_cache_87_squeeze_mask_0, x = read_state_3)[name = string("v_cache_87_cast_fp16")];
+            int32 var_4579 = const()[name = string("op_4579"), val = int32(-1)];
+            tensor<int32, [1]> var_4597_axes_0 = const()[name = string("op_4597_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727891904)))];
+            tensor<fp16, [1024]> blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727894016)))];
+            fp16 var_4585_to_fp16 = const()[name = string("op_4585_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_4597_cast_fp16 = layer_norm(axes = var_4597_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_4585_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_381_cast_fp16)[name = string("op_4597_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4608_to_fp16 = const()[name = string("op_4608_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727896128)))];
+            tensor<fp16, [1024]> var_4609_to_fp16 = const()[name = string("op_4609_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729993344)))];
+            tensor<fp16, [1, ?, 1024]> linear_168_cast_fp16 = linear(bias = var_4609_to_fp16, weight = var_4608_to_fp16, x = var_4597_cast_fp16)[name = string("linear_168_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4612_to_fp16 = const()[name = string("op_4612_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729995456)))];
+            tensor<fp16, [1, ?, 1024]> linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4612_to_fp16, x = var_4597_cast_fp16)[name = string("linear_169_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4616_to_fp16 = const()[name = string("op_4616_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732092672)))];
+            tensor<fp16, [1024]> var_4617_to_fp16 = const()[name = string("op_4617_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(734189888)))];
+            tensor<fp16, [1, ?, 1024]> linear_170_cast_fp16 = linear(bias = var_4617_to_fp16, weight = var_4616_to_fp16, x = var_4597_cast_fp16)[name = string("linear_170_cast_fp16")];
+            tensor<int32, [3]> var_4619_shape_cast_fp16 = shape(x = linear_168_cast_fp16)[name = string("op_4619_shape_cast_fp16")];
+            int32 gather_254_axis_0 = const()[name = string("gather_254_axis_0"), val = int32(0)];
+            int32 gather_254_batch_dims_0 = const()[name = string("gather_254_batch_dims_0"), val = int32(0)];
+            bool gather_254_validate_indices_0 = const()[name = string("gather_254_validate_indices_0"), val = bool(false)];
+            string var_4619_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4619_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_254_to_uint16 = const()[name = string("select_254_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4619_shape_cast_fp16_to_uint16 = cast(dtype = var_4619_shape_cast_fp16_to_uint16_dtype_0, x = var_4619_shape_cast_fp16)[name = string("cast_252")];
+            uint16 gather_254_cast_uint16 = gather(axis = gather_254_axis_0, batch_dims = gather_254_batch_dims_0, indices = select_254_to_uint16, validate_indices = gather_254_validate_indices_0, x = var_4619_shape_cast_fp16_to_uint16)[name = string("gather_254_cast_uint16")];
+            string gather_254_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_254_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_254_cast_uint16_to_int32 = cast(dtype = gather_254_cast_uint16_to_int32_dtype_0, x = gather_254_cast_uint16)[name = string("cast_251")];
+            int32 end_step_45 = add(x = offset, y = gather_254_cast_uint16_to_int32)[name = string("end_step_45")];
+            tensor<int32, [1]> expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_338 = const()[name = string("expand_dims_338"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_339_axes_0 = const()[name = string("expand_dims_339_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_339 = expand_dims(axes = expand_dims_339_axes_0, x = end_step_45)[name = string("expand_dims_339")];
+            tensor<int32, [1]> concat_466_values0_0 = const()[name = string("concat_466_values0_0"), val = tensor<int32, [1]>([21])];
+            int32 concat_466_axis_0 = const()[name = string("concat_466_axis_0"), val = int32(0)];
+            bool concat_466_interleave_0 = const()[name = string("concat_466_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_466 = concat(axis = concat_466_axis_0, interleave = concat_466_interleave_0, values = (concat_466_values0_0, expand_dims_336, expand_dims_1, expand_dims_338))[name = string("concat_466")];
+            tensor<int32, [1]> concat_467_values0_0 = const()[name = string("concat_467_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_467_values1_0 = const()[name = string("concat_467_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_467_values3_0 = const()[name = string("concat_467_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_467_axis_0 = const()[name = string("concat_467_axis_0"), val = int32(0)];
+            bool concat_467_interleave_0 = const()[name = string("concat_467_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_467 = concat(axis = concat_467_axis_0, interleave = concat_467_interleave_0, values = (concat_467_values0_0, concat_467_values1_0, expand_dims_339, concat_467_values3_0))[name = string("concat_467")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = k_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = k_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_22_stride_0, update = linear_169_cast_fp16, x = coreml_update_state_88)[name = string("k_cache1_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_22_cast_fp16, input = k_cache1)[name = string("coreml_update_state_90_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_90 = read_state(input = k_cache1)[name = string("coreml_update_state_90")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_22_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = v_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = v_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_22_stride_0, update = linear_170_cast_fp16, x = coreml_update_state_89)[name = string("v_cache1_internal_tensor_assign_22_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_22_cast_fp16, input = v_cache1)[name = string("coreml_update_state_91_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_91 = read_state(input = v_cache1)[name = string("coreml_update_state_91")];
+            int32 concat_472_values0_0 = const()[name = string("concat_472_values0_0"), val = int32(1)];
+            int32 concat_472_values2_0 = const()[name = string("concat_472_values2_0"), val = int32(1024)];
+            int32 concat_472_axis_0 = const()[name = string("concat_472_axis_0"), val = int32(0)];
+            bool concat_472_interleave_0 = const()[name = string("concat_472_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_472 = concat(axis = concat_472_axis_0, interleave = concat_472_interleave_0, values = (concat_472_values0_0, end_step_45, concat_472_values2_0))[name = string("concat_472")];
+            tensor<int32, [3]> var_4635_begin_0 = const()[name = string("op_4635_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4635_end_mask_0 = const()[name = string("op_4635_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_4635_cast_fp16 = slice_by_index(begin = var_4635_begin_0, end = concat_472, end_mask = var_4635_end_mask_0, x = k_cache_85_cast_fp16)[name = string("op_4635_cast_fp16")];
+            tensor<int32, [3]> var_4638_begin_0 = const()[name = string("op_4638_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4638_end_mask_0 = const()[name = string("op_4638_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_4638_cast_fp16 = slice_by_index(begin = var_4638_begin_0, end = concat_472, end_mask = var_4638_end_mask_0, x = v_cache_85_cast_fp16)[name = string("op_4638_cast_fp16")];
+            tensor<int32, [4]> concat_474x = const()[name = string("concat_474x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4648_cast_fp16 = reshape(shape = concat_474x, x = linear_168_cast_fp16)[name = string("op_4648_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_171_cast_fp16 = mul(x = var_4648_cast_fp16, y = const_204_to_fp16)[name = string("q_171_cast_fp16")];
+            tensor<int32, [4]> concat_475x = const()[name = string("concat_475x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4655_cast_fp16 = reshape(shape = concat_475x, x = var_4635_cast_fp16)[name = string("op_4655_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_205_to_fp16 = const()[name = string("const_205_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_215_cast_fp16 = mul(x = var_4655_cast_fp16, y = const_205_to_fp16)[name = string("k_215_cast_fp16")];
+            tensor<int32, [4]> concat_476x = const()[name = string("concat_476x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4662_cast_fp16 = reshape(shape = concat_476x, x = var_4638_cast_fp16)[name = string("op_4662_cast_fp16")];
+            tensor<int32, [4]> var_4663 = const()[name = string("op_4663"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_127_transpose_x_0 = const()[name = string("qk_127_transpose_x_0"), val = bool(false)];
+            bool qk_127_transpose_y_0 = const()[name = string("qk_127_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_277_perm_0 = const()[name = string("transpose_277_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_278_perm_0 = const()[name = string("transpose_278_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_278 = transpose(perm = transpose_278_perm_0, x = k_215_cast_fp16)[name = string("transpose_310")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_277 = transpose(perm = transpose_277_perm_0, x = q_171_cast_fp16)[name = string("transpose_311")];
+            tensor<fp16, [1, 16, ?, ?]> qk_127_cast_fp16 = matmul(transpose_x = qk_127_transpose_x_0, transpose_y = qk_127_transpose_y_0, x = transpose_277, y = transpose_278)[name = string("qk_127_cast_fp16")];
+            int32 concat_477_values1_0 = const()[name = string("concat_477_values1_0"), val = int32(448)];
+            int32 concat_477_axis_0 = const()[name = string("concat_477_axis_0"), val = int32(0)];
+            bool concat_477_interleave_0 = const()[name = string("concat_477_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_477 = concat(axis = concat_477_axis_0, interleave = concat_477_interleave_0, values = (gather_254_cast_uint16_to_int32, concat_477_values1_0))[name = string("concat_477")];
+            tensor<int32, [2]> var_4666_begin_0 = const()[name = string("op_4666_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4666_end_mask_0 = const()[name = string("op_4666_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4666_cast_fp16 = slice_by_index(begin = var_4666_begin_0, end = concat_477, end_mask = var_4666_end_mask_0, x = mask_to_fp16)[name = string("op_4666_cast_fp16")];
+            int32 concat_478_values0_0 = const()[name = string("concat_478_values0_0"), val = int32(0)];
+            int32 concat_478_axis_0 = const()[name = string("concat_478_axis_0"), val = int32(0)];
+            bool concat_478_interleave_0 = const()[name = string("concat_478_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_478 = concat(axis = concat_478_axis_0, interleave = concat_478_interleave_0, values = (concat_478_values0_0, gather_254_cast_uint16_to_int32))[name = string("concat_478")];
+            tensor<int32, [2]> var_4667_begin_0 = const()[name = string("op_4667_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4667_end_mask_0 = const()[name = string("op_4667_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4667_cast_fp16 = slice_by_index(begin = var_4667_begin_0, end = concat_478, end_mask = var_4667_end_mask_0, x = var_4666_cast_fp16)[name = string("op_4667_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_129_cast_fp16 = add(x = qk_127_cast_fp16, y = var_4667_cast_fp16)[name = string("qk_129_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_4670_cast_fp16 = softmax(axis = var_4579, x = qk_129_cast_fp16)[name = string("op_4670_cast_fp16")];
+            bool var_4672_transpose_x_0 = const()[name = string("op_4672_transpose_x_0"), val = bool(false)];
+            bool var_4672_transpose_y_0 = const()[name = string("op_4672_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_215_cast_fp16 = transpose(perm = var_4663, x = var_4662_cast_fp16)[name = string("transpose_312")];
+            tensor<fp16, [1, 16, ?, 64]> var_4672_cast_fp16 = matmul(transpose_x = var_4672_transpose_x_0, transpose_y = var_4672_transpose_y_0, x = var_4670_cast_fp16, y = v_215_cast_fp16)[name = string("op_4672_cast_fp16")];
+            tensor<int32, [4]> var_4673 = const()[name = string("op_4673"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_479x = const()[name = string("concat_479x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_4674_cast_fp16 = transpose(perm = var_4673, x = var_4672_cast_fp16)[name = string("transpose_309")];
+            tensor<fp16, [1, ?, 1024]> x_385_cast_fp16 = reshape(shape = concat_479x, x = var_4674_cast_fp16)[name = string("x_385_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4678_to_fp16 = const()[name = string("op_4678_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(734192000)))];
+            tensor<fp16, [1024]> var_4679_to_fp16 = const()[name = string("op_4679_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736289216)))];
+            tensor<fp16, [1, ?, 1024]> linear_171_cast_fp16 = linear(bias = var_4679_to_fp16, weight = var_4678_to_fp16, x = x_385_cast_fp16)[name = string("linear_171_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_387_cast_fp16 = add(x = x_381_cast_fp16, y = linear_171_cast_fp16)[name = string("x_387_cast_fp16")];
+            tensor<int32, [1]> var_4686_axes_0 = const()[name = string("op_4686_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_21_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736291328)))];
+            tensor<fp16, [1024]> blocks_21_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736293440)))];
+            tensor<fp16, [1, ?, 1024]> var_4686_cast_fp16 = layer_norm(axes = var_4686_axes_0, beta = blocks_21_cross_attn_ln_bias_to_fp16, epsilon = var_4585_to_fp16, gamma = blocks_21_cross_attn_ln_weight_to_fp16, x = x_387_cast_fp16)[name = string("op_4686_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736295552)))];
+            tensor<fp16, [1024]> var_4696_to_fp16 = const()[name = string("op_4696_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738392768)))];
+            tensor<fp16, [1, ?, 1024]> linear_172_cast_fp16 = linear(bias = var_4696_to_fp16, weight = var_4695_to_fp16, x = var_4686_cast_fp16)[name = string("linear_172_cast_fp16")];
+            tensor<int32, [3]> concat_480 = const()[name = string("concat_480"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_481 = const()[name = string("concat_481"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_217_internal_tensor_assign_1_stride_0 = const()[name = string("k_217_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_480, begin_mask = k_217_internal_tensor_assign_1_begin_mask_0, end = concat_481, end_mask = k_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_217_internal_tensor_assign_1_squeeze_mask_0, stride = k_217_internal_tensor_assign_1_stride_0, update = k_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("k_217_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_482 = const()[name = string("concat_482"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_483 = const()[name = string("concat_483"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_217_internal_tensor_assign_1_stride_0 = const()[name = string("v_217_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_482, begin_mask = v_217_internal_tensor_assign_1_begin_mask_0, end = concat_483, end_mask = v_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_217_internal_tensor_assign_1_squeeze_mask_0, stride = v_217_internal_tensor_assign_1_stride_0, update = v_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("v_217_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_484x = const()[name = string("concat_484x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4716_cast_fp16 = reshape(shape = concat_484x, x = linear_172_cast_fp16)[name = string("op_4716_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_175_cast_fp16 = mul(x = var_4716_cast_fp16, y = const_206_to_fp16)[name = string("q_175_cast_fp16")];
+            tensor<int32, [4]> var_4722 = const()[name = string("op_4722"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_4723_cast_fp16 = reshape(shape = var_4722, x = k_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4723_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_219_cast_fp16 = mul(x = var_4723_cast_fp16, y = const_207_to_fp16)[name = string("k_219_cast_fp16")];
+            tensor<int32, [4]> var_4729 = const()[name = string("op_4729"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_4730_cast_fp16 = reshape(shape = var_4729, x = v_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4730_cast_fp16")];
+            tensor<int32, [4]> var_4731 = const()[name = string("op_4731"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_131_transpose_x_0 = const()[name = string("qk_131_transpose_x_0"), val = bool(false)];
+            bool qk_131_transpose_y_0 = const()[name = string("qk_131_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_279_perm_0 = const()[name = string("transpose_279_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_280_perm_0 = const()[name = string("transpose_280_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_280 = transpose(perm = transpose_280_perm_0, x = k_219_cast_fp16)[name = string("transpose_306")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_279 = transpose(perm = transpose_279_perm_0, x = q_175_cast_fp16)[name = string("transpose_307")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_131_cast_fp16 = matmul(transpose_x = qk_131_transpose_x_0, transpose_y = qk_131_transpose_y_0, x = transpose_279, y = transpose_280)[name = string("qk_131_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_4735_cast_fp16 = softmax(axis = var_4579, x = qk_131_cast_fp16)[name = string("op_4735_cast_fp16")];
+            bool var_4737_transpose_x_0 = const()[name = string("op_4737_transpose_x_0"), val = bool(false)];
+            bool var_4737_transpose_y_0 = const()[name = string("op_4737_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_219_cast_fp16 = transpose(perm = var_4731, x = var_4730_cast_fp16)[name = string("transpose_308")];
+            tensor<fp16, [1, 16, ?, 64]> var_4737_cast_fp16 = matmul(transpose_x = var_4737_transpose_x_0, transpose_y = var_4737_transpose_y_0, x = var_4735_cast_fp16, y = v_219_cast_fp16)[name = string("op_4737_cast_fp16")];
+            tensor<int32, [4]> var_4738 = const()[name = string("op_4738"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_485x = const()[name = string("concat_485x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_4739_cast_fp16 = transpose(perm = var_4738, x = var_4737_cast_fp16)[name = string("transpose_305")];
+            tensor<fp16, [1, ?, 1024]> x_391_cast_fp16 = reshape(shape = concat_485x, x = var_4739_cast_fp16)[name = string("x_391_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4743_to_fp16 = const()[name = string("op_4743_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738394880)))];
+            tensor<fp16, [1024]> var_4744_to_fp16 = const()[name = string("op_4744_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740492096)))];
+            tensor<fp16, [1, ?, 1024]> linear_173_cast_fp16 = linear(bias = var_4744_to_fp16, weight = var_4743_to_fp16, x = x_391_cast_fp16)[name = string("linear_173_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_393_cast_fp16 = add(x = x_387_cast_fp16, y = linear_173_cast_fp16)[name = string("x_393_cast_fp16")];
+            tensor<int32, [1]> var_4751_axes_0 = const()[name = string("op_4751_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740494208)))];
+            tensor<fp16, [1024]> blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740496320)))];
+            tensor<fp16, [1, ?, 1024]> var_4751_cast_fp16 = layer_norm(axes = var_4751_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_4585_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_393_cast_fp16)[name = string("op_4751_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_4760_to_fp16 = const()[name = string("op_4760_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740498432)))];
+            tensor<fp16, [4096]> var_4761_to_fp16 = const()[name = string("op_4761_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748887104)))];
+            tensor<fp16, [1, ?, 4096]> linear_174_cast_fp16 = linear(bias = var_4761_to_fp16, weight = var_4760_to_fp16, x = var_4751_cast_fp16)[name = string("linear_174_cast_fp16")];
+            string x_397_mode_0 = const()[name = string("x_397_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_397_cast_fp16 = gelu(mode = x_397_mode_0, x = linear_174_cast_fp16)[name = string("x_397_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_4766_to_fp16 = const()[name = string("op_4766_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748895360)))];
+            tensor<fp16, [1024]> var_4767_to_fp16 = const()[name = string("op_4767_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757284032)))];
+            tensor<fp16, [1, ?, 1024]> linear_175_cast_fp16 = linear(bias = var_4767_to_fp16, weight = var_4766_to_fp16, x = x_397_cast_fp16)[name = string("linear_175_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_399_cast_fp16 = add(x = x_393_cast_fp16, y = linear_175_cast_fp16)[name = string("x_399_cast_fp16")];
+            tensor<int32, [4]> k_cache_89_begin_0 = const()[name = string("k_cache_89_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_89_end_0 = const()[name = string("k_cache_89_end_0"), val = tensor<int32, [4]>([23, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_89_end_mask_0 = const()[name = string("k_cache_89_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_89_squeeze_mask_0 = const()[name = string("k_cache_89_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_89_cast_fp16 = slice_by_index(begin = k_cache_89_begin_0, end = k_cache_89_end_0, end_mask = k_cache_89_end_mask_0, squeeze_mask = k_cache_89_squeeze_mask_0, x = coreml_update_state_90)[name = string("k_cache_89_cast_fp16")];
+            tensor<int32, [4]> v_cache_89_begin_0 = const()[name = string("v_cache_89_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_89_end_0 = const()[name = string("v_cache_89_end_0"), val = tensor<int32, [4]>([23, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_89_end_mask_0 = const()[name = string("v_cache_89_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_89_squeeze_mask_0 = const()[name = string("v_cache_89_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_89_cast_fp16 = slice_by_index(begin = v_cache_89_begin_0, end = v_cache_89_end_0, end_mask = v_cache_89_end_mask_0, squeeze_mask = v_cache_89_squeeze_mask_0, x = coreml_update_state_91)[name = string("v_cache_89_cast_fp16")];
+            tensor<int32, [4]> k_cache_91_begin_0 = const()[name = string("k_cache_91_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_91_end_0 = const()[name = string("k_cache_91_end_0"), val = tensor<int32, [4]>([23, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_91_end_mask_0 = const()[name = string("k_cache_91_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_91_squeeze_mask_0 = const()[name = string("k_cache_91_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_91_cast_fp16 = slice_by_index(begin = k_cache_91_begin_0, end = k_cache_91_end_0, end_mask = k_cache_91_end_mask_0, squeeze_mask = k_cache_91_squeeze_mask_0, x = read_state_2)[name = string("k_cache_91_cast_fp16")];
+            tensor<int32, [4]> v_cache_91_begin_0 = const()[name = string("v_cache_91_begin_0"), val = tensor<int32, [4]>([22, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_91_end_0 = const()[name = string("v_cache_91_end_0"), val = tensor<int32, [4]>([23, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_91_end_mask_0 = const()[name = string("v_cache_91_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_91_squeeze_mask_0 = const()[name = string("v_cache_91_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_91_cast_fp16 = slice_by_index(begin = v_cache_91_begin_0, end = v_cache_91_end_0, end_mask = v_cache_91_end_mask_0, squeeze_mask = v_cache_91_squeeze_mask_0, x = read_state_3)[name = string("v_cache_91_cast_fp16")];
+            int32 var_4790 = const()[name = string("op_4790"), val = int32(-1)];
+            tensor<int32, [1]> var_4808_axes_0 = const()[name = string("op_4808_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757286144)))];
+            tensor<fp16, [1024]> blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757288256)))];
+            fp16 var_4796_to_fp16 = const()[name = string("op_4796_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_4808_cast_fp16 = layer_norm(axes = var_4808_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_4796_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_399_cast_fp16)[name = string("op_4808_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4819_to_fp16 = const()[name = string("op_4819_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757290368)))];
+            tensor<fp16, [1024]> var_4820_to_fp16 = const()[name = string("op_4820_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(759387584)))];
+            tensor<fp16, [1, ?, 1024]> linear_176_cast_fp16 = linear(bias = var_4820_to_fp16, weight = var_4819_to_fp16, x = var_4808_cast_fp16)[name = string("linear_176_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4823_to_fp16 = const()[name = string("op_4823_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(759389696)))];
+            tensor<fp16, [1, ?, 1024]> linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4823_to_fp16, x = var_4808_cast_fp16)[name = string("linear_177_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4827_to_fp16 = const()[name = string("op_4827_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(761486912)))];
+            tensor<fp16, [1024]> var_4828_to_fp16 = const()[name = string("op_4828_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(763584128)))];
+            tensor<fp16, [1, ?, 1024]> linear_178_cast_fp16 = linear(bias = var_4828_to_fp16, weight = var_4827_to_fp16, x = var_4808_cast_fp16)[name = string("linear_178_cast_fp16")];
+            tensor<int32, [3]> var_4830_shape_cast_fp16 = shape(x = linear_176_cast_fp16)[name = string("op_4830_shape_cast_fp16")];
+            int32 gather_266_axis_0 = const()[name = string("gather_266_axis_0"), val = int32(0)];
+            int32 gather_266_batch_dims_0 = const()[name = string("gather_266_batch_dims_0"), val = int32(0)];
+            bool gather_266_validate_indices_0 = const()[name = string("gather_266_validate_indices_0"), val = bool(false)];
+            string var_4830_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4830_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_266_to_uint16 = const()[name = string("select_266_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_4830_shape_cast_fp16_to_uint16 = cast(dtype = var_4830_shape_cast_fp16_to_uint16_dtype_0, x = var_4830_shape_cast_fp16)[name = string("cast_250")];
+            uint16 gather_266_cast_uint16 = gather(axis = gather_266_axis_0, batch_dims = gather_266_batch_dims_0, indices = select_266_to_uint16, validate_indices = gather_266_validate_indices_0, x = var_4830_shape_cast_fp16_to_uint16)[name = string("gather_266_cast_uint16")];
+            string gather_266_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_266_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_266_cast_uint16_to_int32 = cast(dtype = gather_266_cast_uint16_to_int32_dtype_0, x = gather_266_cast_uint16)[name = string("cast_249")];
+            int32 end_step_47 = add(x = offset, y = gather_266_cast_uint16_to_int32)[name = string("end_step_47")];
+            tensor<int32, [1]> expand_dims_352 = const()[name = string("expand_dims_352"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_354 = const()[name = string("expand_dims_354"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_355_axes_0 = const()[name = string("expand_dims_355_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_355 = expand_dims(axes = expand_dims_355_axes_0, x = end_step_47)[name = string("expand_dims_355")];
+            tensor<int32, [1]> concat_488_values0_0 = const()[name = string("concat_488_values0_0"), val = tensor<int32, [1]>([22])];
+            int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)];
+            bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (concat_488_values0_0, expand_dims_352, expand_dims_1, expand_dims_354))[name = string("concat_488")];
+            tensor<int32, [1]> concat_489_values0_0 = const()[name = string("concat_489_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_489_values1_0 = const()[name = string("concat_489_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_489_values3_0 = const()[name = string("concat_489_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)];
+            bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (concat_489_values0_0, concat_489_values1_0, expand_dims_355, concat_489_values3_0))[name = string("concat_489")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = k_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = k_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_23_stride_0, update = linear_177_cast_fp16, x = coreml_update_state_90)[name = string("k_cache1_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_23_cast_fp16, input = k_cache1)[name = string("coreml_update_state_92_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_92 = read_state(input = k_cache1)[name = string("coreml_update_state_92")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_23_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = v_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = v_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_23_stride_0, update = linear_178_cast_fp16, x = coreml_update_state_91)[name = string("v_cache1_internal_tensor_assign_23_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_23_cast_fp16, input = v_cache1)[name = string("coreml_update_state_93_write_state")];
+            tensor<fp16, [24, 1, 448, 1024]> coreml_update_state_93 = read_state(input = v_cache1)[name = string("coreml_update_state_93")];
+            int32 concat_494_values0_0 = const()[name = string("concat_494_values0_0"), val = int32(1)];
+            int32 concat_494_values2_0 = const()[name = string("concat_494_values2_0"), val = int32(1024)];
+            int32 concat_494_axis_0 = const()[name = string("concat_494_axis_0"), val = int32(0)];
+            bool concat_494_interleave_0 = const()[name = string("concat_494_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_494 = concat(axis = concat_494_axis_0, interleave = concat_494_interleave_0, values = (concat_494_values0_0, end_step_47, concat_494_values2_0))[name = string("concat_494")];
+            tensor<int32, [3]> var_4846_begin_0 = const()[name = string("op_4846_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4846_end_mask_0 = const()[name = string("op_4846_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_4846_cast_fp16 = slice_by_index(begin = var_4846_begin_0, end = concat_494, end_mask = var_4846_end_mask_0, x = k_cache_89_cast_fp16)[name = string("op_4846_cast_fp16")];
+            tensor<int32, [3]> var_4849_begin_0 = const()[name = string("op_4849_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_4849_end_mask_0 = const()[name = string("op_4849_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_4849_cast_fp16 = slice_by_index(begin = var_4849_begin_0, end = concat_494, end_mask = var_4849_end_mask_0, x = v_cache_89_cast_fp16)[name = string("op_4849_cast_fp16")];
+            tensor<int32, [4]> concat_496x = const()[name = string("concat_496x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4859_cast_fp16 = reshape(shape = concat_496x, x = linear_176_cast_fp16)[name = string("op_4859_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_208_to_fp16 = const()[name = string("const_208_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_179_cast_fp16 = mul(x = var_4859_cast_fp16, y = const_208_to_fp16)[name = string("q_179_cast_fp16")];
+            tensor<int32, [4]> concat_497x = const()[name = string("concat_497x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4866_cast_fp16 = reshape(shape = concat_497x, x = var_4846_cast_fp16)[name = string("op_4866_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_225_cast_fp16 = mul(x = var_4866_cast_fp16, y = const_209_to_fp16)[name = string("k_225_cast_fp16")];
+            tensor<int32, [4]> concat_498x = const()[name = string("concat_498x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4873_cast_fp16 = reshape(shape = concat_498x, x = var_4849_cast_fp16)[name = string("op_4873_cast_fp16")];
+            tensor<int32, [4]> var_4874 = const()[name = string("op_4874"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_133_transpose_x_0 = const()[name = string("qk_133_transpose_x_0"), val = bool(false)];
+            bool qk_133_transpose_y_0 = const()[name = string("qk_133_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_281_perm_0 = const()[name = string("transpose_281_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_282_perm_0 = const()[name = string("transpose_282_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_282 = transpose(perm = transpose_282_perm_0, x = k_225_cast_fp16)[name = string("transpose_302")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_281 = transpose(perm = transpose_281_perm_0, x = q_179_cast_fp16)[name = string("transpose_303")];
+            tensor<fp16, [1, 16, ?, ?]> qk_133_cast_fp16 = matmul(transpose_x = qk_133_transpose_x_0, transpose_y = qk_133_transpose_y_0, x = transpose_281, y = transpose_282)[name = string("qk_133_cast_fp16")];
+            int32 concat_499_values1_0 = const()[name = string("concat_499_values1_0"), val = int32(448)];
+            int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)];
+            bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (gather_266_cast_uint16_to_int32, concat_499_values1_0))[name = string("concat_499")];
+            tensor<int32, [2]> var_4877_begin_0 = const()[name = string("op_4877_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4877_end_mask_0 = const()[name = string("op_4877_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_4877_cast_fp16 = slice_by_index(begin = var_4877_begin_0, end = concat_499, end_mask = var_4877_end_mask_0, x = mask_to_fp16)[name = string("op_4877_cast_fp16")];
+            int32 concat_500_values0_0 = const()[name = string("concat_500_values0_0"), val = int32(0)];
+            int32 concat_500_axis_0 = const()[name = string("concat_500_axis_0"), val = int32(0)];
+            bool concat_500_interleave_0 = const()[name = string("concat_500_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_500 = concat(axis = concat_500_axis_0, interleave = concat_500_interleave_0, values = (concat_500_values0_0, gather_266_cast_uint16_to_int32))[name = string("concat_500")];
+            tensor<int32, [2]> var_4878_begin_0 = const()[name = string("op_4878_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_4878_end_mask_0 = const()[name = string("op_4878_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_4878_cast_fp16 = slice_by_index(begin = var_4878_begin_0, end = concat_500, end_mask = var_4878_end_mask_0, x = var_4877_cast_fp16)[name = string("op_4878_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_135_cast_fp16 = add(x = qk_133_cast_fp16, y = var_4878_cast_fp16)[name = string("qk_135_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_4881_cast_fp16 = softmax(axis = var_4790, x = qk_135_cast_fp16)[name = string("op_4881_cast_fp16")];
+            bool var_4883_transpose_x_0 = const()[name = string("op_4883_transpose_x_0"), val = bool(false)];
+            bool var_4883_transpose_y_0 = const()[name = string("op_4883_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_225_cast_fp16 = transpose(perm = var_4874, x = var_4873_cast_fp16)[name = string("transpose_304")];
+            tensor<fp16, [1, 16, ?, 64]> var_4883_cast_fp16 = matmul(transpose_x = var_4883_transpose_x_0, transpose_y = var_4883_transpose_y_0, x = var_4881_cast_fp16, y = v_225_cast_fp16)[name = string("op_4883_cast_fp16")];
+            tensor<int32, [4]> var_4884 = const()[name = string("op_4884"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_501x = const()[name = string("concat_501x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_4885_cast_fp16 = transpose(perm = var_4884, x = var_4883_cast_fp16)[name = string("transpose_301")];
+            tensor<fp16, [1, ?, 1024]> x_403_cast_fp16 = reshape(shape = concat_501x, x = var_4885_cast_fp16)[name = string("x_403_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4889_to_fp16 = const()[name = string("op_4889_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(763586240)))];
+            tensor<fp16, [1024]> var_4890_to_fp16 = const()[name = string("op_4890_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765683456)))];
+            tensor<fp16, [1, ?, 1024]> linear_179_cast_fp16 = linear(bias = var_4890_to_fp16, weight = var_4889_to_fp16, x = x_403_cast_fp16)[name = string("linear_179_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_405_cast_fp16 = add(x = x_399_cast_fp16, y = linear_179_cast_fp16)[name = string("x_405_cast_fp16")];
+            tensor<int32, [1]> var_4897_axes_0 = const()[name = string("op_4897_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_22_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765685568)))];
+            tensor<fp16, [1024]> blocks_22_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765687680)))];
+            tensor<fp16, [1, ?, 1024]> var_4897_cast_fp16 = layer_norm(axes = var_4897_axes_0, beta = blocks_22_cross_attn_ln_bias_to_fp16, epsilon = var_4796_to_fp16, gamma = blocks_22_cross_attn_ln_weight_to_fp16, x = x_405_cast_fp16)[name = string("op_4897_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4906_to_fp16 = const()[name = string("op_4906_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765689792)))];
+            tensor<fp16, [1024]> var_4907_to_fp16 = const()[name = string("op_4907_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767787008)))];
+            tensor<fp16, [1, ?, 1024]> linear_180_cast_fp16 = linear(bias = var_4907_to_fp16, weight = var_4906_to_fp16, x = var_4897_cast_fp16)[name = string("linear_180_cast_fp16")];
+            tensor<int32, [3]> concat_502 = const()[name = string("concat_502"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_503 = const()[name = string("concat_503"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_227_internal_tensor_assign_1_stride_0 = const()[name = string("k_227_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_502, begin_mask = k_227_internal_tensor_assign_1_begin_mask_0, end = concat_503, end_mask = k_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_227_internal_tensor_assign_1_squeeze_mask_0, stride = k_227_internal_tensor_assign_1_stride_0, update = k_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("k_227_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_504 = const()[name = string("concat_504"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_505 = const()[name = string("concat_505"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_227_internal_tensor_assign_1_stride_0 = const()[name = string("v_227_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_504, begin_mask = v_227_internal_tensor_assign_1_begin_mask_0, end = concat_505, end_mask = v_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_227_internal_tensor_assign_1_squeeze_mask_0, stride = v_227_internal_tensor_assign_1_stride_0, update = v_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("v_227_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_506x = const()[name = string("concat_506x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_4927_cast_fp16 = reshape(shape = concat_506x, x = linear_180_cast_fp16)[name = string("op_4927_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_183_cast_fp16 = mul(x = var_4927_cast_fp16, y = const_210_to_fp16)[name = string("q_183_cast_fp16")];
+            tensor<int32, [4]> var_4933 = const()[name = string("op_4933"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_4934_cast_fp16 = reshape(shape = var_4933, x = k_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4934_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_229_cast_fp16 = mul(x = var_4934_cast_fp16, y = const_211_to_fp16)[name = string("k_229_cast_fp16")];
+            tensor<int32, [4]> var_4940 = const()[name = string("op_4940"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_4941_cast_fp16 = reshape(shape = var_4940, x = v_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4941_cast_fp16")];
+            tensor<int32, [4]> var_4942 = const()[name = string("op_4942"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_137_transpose_x_0 = const()[name = string("qk_137_transpose_x_0"), val = bool(false)];
+            bool qk_137_transpose_y_0 = const()[name = string("qk_137_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_283_perm_0 = const()[name = string("transpose_283_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_284_perm_0 = const()[name = string("transpose_284_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_284 = transpose(perm = transpose_284_perm_0, x = k_229_cast_fp16)[name = string("transpose_298")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_283 = transpose(perm = transpose_283_perm_0, x = q_183_cast_fp16)[name = string("transpose_299")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_137_cast_fp16 = matmul(transpose_x = qk_137_transpose_x_0, transpose_y = qk_137_transpose_y_0, x = transpose_283, y = transpose_284)[name = string("qk_137_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_4946_cast_fp16 = softmax(axis = var_4790, x = qk_137_cast_fp16)[name = string("op_4946_cast_fp16")];
+            bool var_4948_transpose_x_0 = const()[name = string("op_4948_transpose_x_0"), val = bool(false)];
+            bool var_4948_transpose_y_0 = const()[name = string("op_4948_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_229_cast_fp16 = transpose(perm = var_4942, x = var_4941_cast_fp16)[name = string("transpose_300")];
+            tensor<fp16, [1, 16, ?, 64]> var_4948_cast_fp16 = matmul(transpose_x = var_4948_transpose_x_0, transpose_y = var_4948_transpose_y_0, x = var_4946_cast_fp16, y = v_229_cast_fp16)[name = string("op_4948_cast_fp16")];
+            tensor<int32, [4]> var_4949 = const()[name = string("op_4949"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_507x = const()[name = string("concat_507x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_4950_cast_fp16 = transpose(perm = var_4949, x = var_4948_cast_fp16)[name = string("transpose_297")];
+            tensor<fp16, [1, ?, 1024]> x_409_cast_fp16 = reshape(shape = concat_507x, x = var_4950_cast_fp16)[name = string("x_409_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_4954_to_fp16 = const()[name = string("op_4954_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767789120)))];
+            tensor<fp16, [1024]> var_4955_to_fp16 = const()[name = string("op_4955_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769886336)))];
+            tensor<fp16, [1, ?, 1024]> linear_181_cast_fp16 = linear(bias = var_4955_to_fp16, weight = var_4954_to_fp16, x = x_409_cast_fp16)[name = string("linear_181_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_411_cast_fp16 = add(x = x_405_cast_fp16, y = linear_181_cast_fp16)[name = string("x_411_cast_fp16")];
+            tensor<int32, [1]> var_4962_axes_0 = const()[name = string("op_4962_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769888448)))];
+            tensor<fp16, [1024]> blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769890560)))];
+            tensor<fp16, [1, ?, 1024]> var_4962_cast_fp16 = layer_norm(axes = var_4962_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_4796_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_411_cast_fp16)[name = string("op_4962_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_4971_to_fp16 = const()[name = string("op_4971_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769892672)))];
+            tensor<fp16, [4096]> var_4972_to_fp16 = const()[name = string("op_4972_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778281344)))];
+            tensor<fp16, [1, ?, 4096]> linear_182_cast_fp16 = linear(bias = var_4972_to_fp16, weight = var_4971_to_fp16, x = var_4962_cast_fp16)[name = string("linear_182_cast_fp16")];
+            string x_415_mode_0 = const()[name = string("x_415_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_415_cast_fp16 = gelu(mode = x_415_mode_0, x = linear_182_cast_fp16)[name = string("x_415_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_4977_to_fp16 = const()[name = string("op_4977_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778289600)))];
+            tensor<fp16, [1024]> var_4978_to_fp16 = const()[name = string("op_4978_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786678272)))];
+            tensor<fp16, [1, ?, 1024]> linear_183_cast_fp16 = linear(bias = var_4978_to_fp16, weight = var_4977_to_fp16, x = x_415_cast_fp16)[name = string("linear_183_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_417_cast_fp16 = add(x = x_411_cast_fp16, y = linear_183_cast_fp16)[name = string("x_417_cast_fp16")];
+            tensor<int32, [4]> k_cache_93_begin_0 = const()[name = string("k_cache_93_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_93_end_0 = const()[name = string("k_cache_93_end_0"), val = tensor<int32, [4]>([24, 1, 448, 1024])];
+            tensor<bool, [4]> k_cache_93_end_mask_0 = const()[name = string("k_cache_93_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_93_squeeze_mask_0 = const()[name = string("k_cache_93_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> k_cache_93_cast_fp16 = slice_by_index(begin = k_cache_93_begin_0, end = k_cache_93_end_0, end_mask = k_cache_93_end_mask_0, squeeze_mask = k_cache_93_squeeze_mask_0, x = coreml_update_state_92)[name = string("k_cache_93_cast_fp16")];
+            tensor<int32, [4]> v_cache_93_begin_0 = const()[name = string("v_cache_93_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_93_end_0 = const()[name = string("v_cache_93_end_0"), val = tensor<int32, [4]>([24, 1, 448, 1024])];
+            tensor<bool, [4]> v_cache_93_end_mask_0 = const()[name = string("v_cache_93_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_93_squeeze_mask_0 = const()[name = string("v_cache_93_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 1024]> v_cache_93_cast_fp16 = slice_by_index(begin = v_cache_93_begin_0, end = v_cache_93_end_0, end_mask = v_cache_93_end_mask_0, squeeze_mask = v_cache_93_squeeze_mask_0, x = coreml_update_state_93)[name = string("v_cache_93_cast_fp16")];
+            tensor<int32, [4]> k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor<int32, [4]>([24, 1, 1500, 1024])];
+            tensor<bool, [4]> k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")];
+            tensor<int32, [4]> v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor<int32, [4]>([23, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor<int32, [4]>([24, 1, 1500, 1024])];
+            tensor<bool, [4]> v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")];
+            int32 var_5001 = const()[name = string("op_5001"), val = int32(-1)];
+            tensor<int32, [1]> var_5019_axes_0 = const()[name = string("op_5019_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786680384)))];
+            tensor<fp16, [1024]> blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786682496)))];
+            fp16 var_5007_to_fp16 = const()[name = string("op_5007_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_5019_cast_fp16 = layer_norm(axes = var_5019_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_5007_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_417_cast_fp16)[name = string("op_5019_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_5030_to_fp16 = const()[name = string("op_5030_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786684608)))];
+            tensor<fp16, [1024]> var_5031_to_fp16 = const()[name = string("op_5031_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788781824)))];
+            tensor<fp16, [1, ?, 1024]> linear_184_cast_fp16 = linear(bias = var_5031_to_fp16, weight = var_5030_to_fp16, x = var_5019_cast_fp16)[name = string("linear_184_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_5034_to_fp16 = const()[name = string("op_5034_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788783936)))];
+            tensor<fp16, [1, ?, 1024]> linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5034_to_fp16, x = var_5019_cast_fp16)[name = string("linear_185_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_5038_to_fp16 = const()[name = string("op_5038_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790881152)))];
+            tensor<fp16, [1024]> var_5039_to_fp16 = const()[name = string("op_5039_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(792978368)))];
+            tensor<fp16, [1, ?, 1024]> linear_186_cast_fp16 = linear(bias = var_5039_to_fp16, weight = var_5038_to_fp16, x = var_5019_cast_fp16)[name = string("linear_186_cast_fp16")];
+            tensor<int32, [3]> var_5041_shape_cast_fp16 = shape(x = linear_184_cast_fp16)[name = string("op_5041_shape_cast_fp16")];
+            int32 gather_278_axis_0 = const()[name = string("gather_278_axis_0"), val = int32(0)];
+            int32 gather_278_batch_dims_0 = const()[name = string("gather_278_batch_dims_0"), val = int32(0)];
+            bool gather_278_validate_indices_0 = const()[name = string("gather_278_validate_indices_0"), val = bool(false)];
+            string var_5041_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5041_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_278_to_uint16 = const()[name = string("select_278_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_5041_shape_cast_fp16_to_uint16 = cast(dtype = var_5041_shape_cast_fp16_to_uint16_dtype_0, x = var_5041_shape_cast_fp16)[name = string("cast_248")];
+            uint16 gather_278_cast_uint16 = gather(axis = gather_278_axis_0, batch_dims = gather_278_batch_dims_0, indices = select_278_to_uint16, validate_indices = gather_278_validate_indices_0, x = var_5041_shape_cast_fp16_to_uint16)[name = string("gather_278_cast_uint16")];
+            string gather_278_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_278_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_278_cast_uint16_to_int32 = cast(dtype = gather_278_cast_uint16_to_int32_dtype_0, x = gather_278_cast_uint16)[name = string("cast_247")];
+            int32 end_step = add(x = offset, y = gather_278_cast_uint16_to_int32)[name = string("end_step")];
+            tensor<int32, [1]> expand_dims_368 = const()[name = string("expand_dims_368"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_370 = const()[name = string("expand_dims_370"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_371_axes_0 = const()[name = string("expand_dims_371_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_371 = expand_dims(axes = expand_dims_371_axes_0, x = end_step)[name = string("expand_dims_371")];
+            tensor<int32, [1]> concat_510_values0_0 = const()[name = string("concat_510_values0_0"), val = tensor<int32, [1]>([23])];
+            int32 concat_510_axis_0 = const()[name = string("concat_510_axis_0"), val = int32(0)];
+            bool concat_510_interleave_0 = const()[name = string("concat_510_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_510 = concat(axis = concat_510_axis_0, interleave = concat_510_interleave_0, values = (concat_510_values0_0, expand_dims_368, expand_dims_1, expand_dims_370))[name = string("concat_510")];
+            tensor<int32, [1]> concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_511_values3_0 = const()[name = string("concat_511_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)];
+            bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, expand_dims_371, concat_511_values3_0))[name = string("concat_511")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> k_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = k_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = k_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_24_stride_0, update = linear_185_cast_fp16, x = coreml_update_state_92)[name = string("k_cache1_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_24_cast_fp16, input = k_cache1)[name = string("coreml_update_state_94_write_state")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_24_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [24, 1, 448, 1024]> v_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = v_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = v_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_24_stride_0, update = linear_186_cast_fp16, x = coreml_update_state_93)[name = string("v_cache1_internal_tensor_assign_24_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_24_cast_fp16, input = v_cache1)[name = string("coreml_update_state_95_write_state")];
+            int32 concat_516_values0_0 = const()[name = string("concat_516_values0_0"), val = int32(1)];
+            int32 concat_516_values2_0 = const()[name = string("concat_516_values2_0"), val = int32(1024)];
+            int32 concat_516_axis_0 = const()[name = string("concat_516_axis_0"), val = int32(0)];
+            bool concat_516_interleave_0 = const()[name = string("concat_516_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_516 = concat(axis = concat_516_axis_0, interleave = concat_516_interleave_0, values = (concat_516_values0_0, end_step, concat_516_values2_0))[name = string("concat_516")];
+            tensor<int32, [3]> var_5057_begin_0 = const()[name = string("op_5057_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5057_end_mask_0 = const()[name = string("op_5057_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_5057_cast_fp16 = slice_by_index(begin = var_5057_begin_0, end = concat_516, end_mask = var_5057_end_mask_0, x = k_cache_93_cast_fp16)[name = string("op_5057_cast_fp16")];
+            tensor<int32, [3]> var_5060_begin_0 = const()[name = string("op_5060_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_5060_end_mask_0 = const()[name = string("op_5060_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 1024]> var_5060_cast_fp16 = slice_by_index(begin = var_5060_begin_0, end = concat_516, end_mask = var_5060_end_mask_0, x = v_cache_93_cast_fp16)[name = string("op_5060_cast_fp16")];
+            tensor<int32, [4]> concat_518x = const()[name = string("concat_518x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_5070_cast_fp16 = reshape(shape = concat_518x, x = linear_184_cast_fp16)[name = string("op_5070_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_187_cast_fp16 = mul(x = var_5070_cast_fp16, y = const_212_to_fp16)[name = string("q_187_cast_fp16")];
+            tensor<int32, [4]> concat_519x = const()[name = string("concat_519x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_5077_cast_fp16 = reshape(shape = concat_519x, x = var_5057_cast_fp16)[name = string("op_5077_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> k_235_cast_fp16 = mul(x = var_5077_cast_fp16, y = const_213_to_fp16)[name = string("k_235_cast_fp16")];
+            tensor<int32, [4]> concat_520x = const()[name = string("concat_520x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_5084_cast_fp16 = reshape(shape = concat_520x, x = var_5060_cast_fp16)[name = string("op_5084_cast_fp16")];
+            tensor<int32, [4]> var_5085 = const()[name = string("op_5085"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_139_transpose_x_0 = const()[name = string("qk_139_transpose_x_0"), val = bool(false)];
+            bool qk_139_transpose_y_0 = const()[name = string("qk_139_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_285_perm_0 = const()[name = string("transpose_285_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_286_perm_0 = const()[name = string("transpose_286_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, ?]> transpose_286 = transpose(perm = transpose_286_perm_0, x = k_235_cast_fp16)[name = string("transpose_294")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_285 = transpose(perm = transpose_285_perm_0, x = q_187_cast_fp16)[name = string("transpose_295")];
+            tensor<fp16, [1, 16, ?, ?]> qk_139_cast_fp16 = matmul(transpose_x = qk_139_transpose_x_0, transpose_y = qk_139_transpose_y_0, x = transpose_285, y = transpose_286)[name = string("qk_139_cast_fp16")];
+            int32 concat_521_values1_0 = const()[name = string("concat_521_values1_0"), val = int32(448)];
+            int32 concat_521_axis_0 = const()[name = string("concat_521_axis_0"), val = int32(0)];
+            bool concat_521_interleave_0 = const()[name = string("concat_521_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_521 = concat(axis = concat_521_axis_0, interleave = concat_521_interleave_0, values = (gather_278_cast_uint16_to_int32, concat_521_values1_0))[name = string("concat_521")];
+            tensor<int32, [2]> var_5088_begin_0 = const()[name = string("op_5088_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5088_end_mask_0 = const()[name = string("op_5088_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_5088_cast_fp16 = slice_by_index(begin = var_5088_begin_0, end = concat_521, end_mask = var_5088_end_mask_0, x = mask_to_fp16)[name = string("op_5088_cast_fp16")];
+            int32 concat_522_values0_0 = const()[name = string("concat_522_values0_0"), val = int32(0)];
+            int32 concat_522_axis_0 = const()[name = string("concat_522_axis_0"), val = int32(0)];
+            bool concat_522_interleave_0 = const()[name = string("concat_522_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_522 = concat(axis = concat_522_axis_0, interleave = concat_522_interleave_0, values = (concat_522_values0_0, gather_278_cast_uint16_to_int32))[name = string("concat_522")];
+            tensor<int32, [2]> var_5089_begin_0 = const()[name = string("op_5089_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_5089_end_mask_0 = const()[name = string("op_5089_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_5089_cast_fp16 = slice_by_index(begin = var_5089_begin_0, end = concat_522, end_mask = var_5089_end_mask_0, x = var_5088_cast_fp16)[name = string("op_5089_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> qk_141_cast_fp16 = add(x = qk_139_cast_fp16, y = var_5089_cast_fp16)[name = string("qk_141_cast_fp16")];
+            tensor<fp16, [1, 16, ?, ?]> var_5092_cast_fp16 = softmax(axis = var_5001, x = qk_141_cast_fp16)[name = string("op_5092_cast_fp16")];
+            bool var_5094_transpose_x_0 = const()[name = string("op_5094_transpose_x_0"), val = bool(false)];
+            bool var_5094_transpose_y_0 = const()[name = string("op_5094_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, ?, 64]> v_235_cast_fp16 = transpose(perm = var_5085, x = var_5084_cast_fp16)[name = string("transpose_296")];
+            tensor<fp16, [1, 16, ?, 64]> var_5094_cast_fp16 = matmul(transpose_x = var_5094_transpose_x_0, transpose_y = var_5094_transpose_y_0, x = var_5092_cast_fp16, y = v_235_cast_fp16)[name = string("op_5094_cast_fp16")];
+            tensor<int32, [4]> var_5095 = const()[name = string("op_5095"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_523x = const()[name = string("concat_523x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_5096_cast_fp16 = transpose(perm = var_5095, x = var_5094_cast_fp16)[name = string("transpose_293")];
+            tensor<fp16, [1, ?, 1024]> x_421_cast_fp16 = reshape(shape = concat_523x, x = var_5096_cast_fp16)[name = string("x_421_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_5100_to_fp16 = const()[name = string("op_5100_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(792980480)))];
+            tensor<fp16, [1024]> var_5101_to_fp16 = const()[name = string("op_5101_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795077696)))];
+            tensor<fp16, [1, ?, 1024]> linear_187_cast_fp16 = linear(bias = var_5101_to_fp16, weight = var_5100_to_fp16, x = x_421_cast_fp16)[name = string("linear_187_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_423_cast_fp16 = add(x = x_417_cast_fp16, y = linear_187_cast_fp16)[name = string("x_423_cast_fp16")];
+            tensor<int32, [1]> var_5108_axes_0 = const()[name = string("op_5108_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_23_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795079808)))];
+            tensor<fp16, [1024]> blocks_23_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795081920)))];
+            tensor<fp16, [1, ?, 1024]> var_5108_cast_fp16 = layer_norm(axes = var_5108_axes_0, beta = blocks_23_cross_attn_ln_bias_to_fp16, epsilon = var_5007_to_fp16, gamma = blocks_23_cross_attn_ln_weight_to_fp16, x = x_423_cast_fp16)[name = string("op_5108_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_5117_to_fp16 = const()[name = string("op_5117_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795084032)))];
+            tensor<fp16, [1024]> var_5118_to_fp16 = const()[name = string("op_5118_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797181248)))];
+            tensor<fp16, [1, ?, 1024]> linear_188_cast_fp16 = linear(bias = var_5118_to_fp16, weight = var_5117_to_fp16, x = var_5108_cast_fp16)[name = string("linear_188_cast_fp16")];
+            tensor<int32, [3]> concat_524 = const()[name = string("concat_524"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_525 = const()[name = string("concat_525"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_237_internal_tensor_assign_1_stride_0 = const()[name = string("k_237_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> k_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_524, begin_mask = k_237_internal_tensor_assign_1_begin_mask_0, end = concat_525, end_mask = k_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_237_internal_tensor_assign_1_squeeze_mask_0, stride = k_237_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_237_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_526 = const()[name = string("concat_526"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_527 = const()[name = string("concat_527"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_237_internal_tensor_assign_1_stride_0 = const()[name = string("v_237_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 1024]> v_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_526, begin_mask = v_237_internal_tensor_assign_1_begin_mask_0, end = concat_527, end_mask = v_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_237_internal_tensor_assign_1_squeeze_mask_0, stride = v_237_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_237_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_528x = const()[name = string("concat_528x"), val = tensor<int32, [4]>([1, -1, 16, 64])];
+            tensor<fp16, [1, ?, 16, 64]> var_5138_cast_fp16 = reshape(shape = concat_528x, x = linear_188_cast_fp16)[name = string("op_5138_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_214_to_fp16 = const()[name = string("const_214_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 16, 64]> q_cast_fp16 = mul(x = var_5138_cast_fp16, y = const_214_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_5144 = const()[name = string("op_5144"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_5145_cast_fp16 = reshape(shape = var_5144, x = k_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5145_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_cast_fp16 = mul(x = var_5145_cast_fp16, y = const_215_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_5151 = const()[name = string("op_5151"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_5152_cast_fp16 = reshape(shape = var_5151, x = v_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5152_cast_fp16")];
+            tensor<int32, [4]> var_5153 = const()[name = string("op_5153"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_287_perm_0 = const()[name = string("transpose_287_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_288_perm_0 = const()[name = string("transpose_288_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_288 = transpose(perm = transpose_288_perm_0, x = k_cast_fp16)[name = string("transpose_290")];
+            tensor<fp16, [1, 16, ?, 64]> transpose_287 = transpose(perm = transpose_287_perm_0, x = q_cast_fp16)[name = string("transpose_291")];
+            tensor<fp16, [1, 16, ?, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_287, y = transpose_288)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 16, ?, 1500]> var_5157_cast_fp16 = softmax(axis = var_5001, x = qk_cast_fp16)[name = string("op_5157_cast_fp16")];
+            bool var_5159_transpose_x_0 = const()[name = string("op_5159_transpose_x_0"), val = bool(false)];
+            bool var_5159_transpose_y_0 = const()[name = string("op_5159_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_cast_fp16 = transpose(perm = var_5153, x = var_5152_cast_fp16)[name = string("transpose_292")];
+            tensor<fp16, [1, 16, ?, 64]> var_5159_cast_fp16 = matmul(transpose_x = var_5159_transpose_x_0, transpose_y = var_5159_transpose_y_0, x = var_5157_cast_fp16, y = v_cast_fp16)[name = string("op_5159_cast_fp16")];
+            tensor<int32, [4]> var_5160 = const()[name = string("op_5160"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_529x = const()[name = string("concat_529x"), val = tensor<int32, [3]>([1, -1, 1024])];
+            tensor<fp16, [1, ?, 16, 64]> var_5161_cast_fp16 = transpose(perm = var_5160, x = var_5159_cast_fp16)[name = string("transpose_289")];
+            tensor<fp16, [1, ?, 1024]> x_427_cast_fp16 = reshape(shape = concat_529x, x = var_5161_cast_fp16)[name = string("x_427_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_5165_to_fp16 = const()[name = string("op_5165_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797183360)))];
+            tensor<fp16, [1024]> var_5166_to_fp16 = const()[name = string("op_5166_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799280576)))];
+            tensor<fp16, [1, ?, 1024]> linear_189_cast_fp16 = linear(bias = var_5166_to_fp16, weight = var_5165_to_fp16, x = x_427_cast_fp16)[name = string("linear_189_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_429_cast_fp16 = add(x = x_423_cast_fp16, y = linear_189_cast_fp16)[name = string("x_429_cast_fp16")];
+            tensor<int32, [1]> var_5173_axes_0 = const()[name = string("op_5173_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799282688)))];
+            tensor<fp16, [1024]> blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799284800)))];
+            tensor<fp16, [1, ?, 1024]> var_5173_cast_fp16 = layer_norm(axes = var_5173_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_5007_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_429_cast_fp16)[name = string("op_5173_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_5182_to_fp16 = const()[name = string("op_5182_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799286912)))];
+            tensor<fp16, [4096]> var_5183_to_fp16 = const()[name = string("op_5183_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807675584)))];
+            tensor<fp16, [1, ?, 4096]> linear_190_cast_fp16 = linear(bias = var_5183_to_fp16, weight = var_5182_to_fp16, x = var_5173_cast_fp16)[name = string("linear_190_cast_fp16")];
+            string x_433_mode_0 = const()[name = string("x_433_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 4096]> x_433_cast_fp16 = gelu(mode = x_433_mode_0, x = linear_190_cast_fp16)[name = string("x_433_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_5188_to_fp16 = const()[name = string("op_5188_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807683840)))];
+            tensor<fp16, [1024]> var_5189_to_fp16 = const()[name = string("op_5189_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816072512)))];
+            tensor<fp16, [1, ?, 1024]> linear_191_cast_fp16 = linear(bias = var_5189_to_fp16, weight = var_5188_to_fp16, x = x_433_cast_fp16)[name = string("linear_191_cast_fp16")];
+            tensor<fp16, [1, ?, 1024]> x_435_cast_fp16 = add(x = x_429_cast_fp16, y = linear_191_cast_fp16)[name = string("x_435_cast_fp16")];
+            tensor<int32, [1]> var_5202_axes_0 = const()[name = string("op_5202_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816074624)))];
+            tensor<fp16, [1024]> ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816076736)))];
+            fp16 var_5193_to_fp16 = const()[name = string("op_5193_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 1024]> var_5202_cast_fp16 = layer_norm(axes = var_5202_axes_0, beta = ln_bias_to_fp16, epsilon = var_5193_to_fp16, gamma = ln_weight_to_fp16, x = x_435_cast_fp16)[name = string("op_5202_cast_fp16")];
+            tensor<fp16, [51865]> var_5212_bias_0_to_fp16 = const()[name = string("op_5212_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816078848)))];
+            tensor<fp16, [1, ?, 51865]> logits = linear(bias = var_5212_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_5202_cast_fp16)[name = string("op_5212_cast_fp16")];
+        } -> (logits);
+}
\ No newline at end of file
diff --git a/medium/decoder_second.mlmodelc/weights/weight.bin b/medium/decoder_second.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fba75e31b9ef54e62e5968cda2fb1ab402230dc4
--- /dev/null
+++ b/medium/decoder_second.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b39ed5d8c3a6ea265389ae0514446dd9fd1a2e2d1fa05ca312ba7f5c191c919
+size 816182642
diff --git a/medium/encoder.mlmodelc/analytics/coremldata.bin b/medium/encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ae03493199bfac3df6651194ca75cd8949716035
--- /dev/null
+++ b/medium/encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e46013df49c631abb62ecdde56db1b2578bef5f436747d44f5ae8e1c7ebcfdb
+size 243
diff --git a/medium/encoder.mlmodelc/coremldata.bin b/medium/encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0a064fdd37da14988fa5735f312090bd26b3790e
--- /dev/null
+++ b/medium/encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:392253a90aa1cec92d5a6840d45c13ddeea2838456ad13320ac360d2bf0ca4d7
+size 318
diff --git a/medium/encoder.mlmodelc/metadata.json b/medium/encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..d22ccf5ba9572014832497aa51efd010de020d43
--- /dev/null
+++ b/medium/encoder.mlmodelc/metadata.json
@@ -0,0 +1,69 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 1024]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.mul" : 48,
+      "Ios18.softmax" : 24,
+      "Ios18.linear" : 144,
+      "Ios18.gelu" : 26,
+      "Ios18.layerNorm" : 49,
+      "Ios18.transpose" : 97,
+      "Ios18.matmul" : 48,
+      "Ios18.conv" : 2,
+      "Ios18.add" : 49,
+      "Ios18.reshape" : 96
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "encoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/medium/encoder.mlmodelc/model.mil b/medium/encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..759bcad72cf1912b69db25ce5506b969631ff645
--- /dev/null
+++ b/medium/encoder.mlmodelc/model.mil
@@ -0,0 +1,1428 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [1, 80, 3000]> logmel_data) {
+            string var_68_pad_type_0 = const()[name = string("op_68_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_68_pad_0 = const()[name = string("op_68_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_68_strides_0 = const()[name = string("op_68_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_68_dilations_0 = const()[name = string("op_68_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_68_groups_0 = const()[name = string("op_68_groups_0"), val = int32(1)];
+            tensor<fp16, [1024, 80, 3]> weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor<fp16, [1024, 80, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1024]> bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491648)))];
+            tensor<fp16, [1, 1024, 3000]> var_68_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_68_dilations_0, groups = var_68_groups_0, pad = var_68_pad_0, pad_type = var_68_pad_type_0, strides = var_68_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_68_cast_fp16")];
+            string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1024, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_68_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_86_pad_type_0 = const()[name = string("op_86_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_86_pad_0 = const()[name = string("op_86_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_86_strides_0 = const()[name = string("op_86_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_86_dilations_0 = const()[name = string("op_86_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_86_groups_0 = const()[name = string("op_86_groups_0"), val = int32(1)];
+            tensor<fp16, [1024, 1024, 3]> weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor<fp16, [1024, 1024, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493760)))];
+            tensor<fp16, [1024]> bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6785280)))];
+            tensor<fp16, [1, 1024, 1500]> var_86_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_86_dilations_0, groups = var_86_groups_0, pad = var_86_pad_0, pad_type = var_86_pad_type_0, strides = var_86_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_86_cast_fp16")];
+            string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1024, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_86_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<int32, [3]> var_92 = const()[name = string("op_92"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [1500, 1024]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [1500, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6787392)))];
+            tensor<fp16, [1, 1500, 1024]> x_5_cast_fp16 = transpose(perm = var_92, x = x_3_cast_fp16)[name = string("transpose_240")];
+            tensor<fp16, [1, 1500, 1024]> var_95_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_95_cast_fp16")];
+            int32 var_108 = const()[name = string("op_108"), val = int32(-1)];
+            tensor<int32, [1]> var_124_axes_0 = const()[name = string("op_124_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9859456)))];
+            tensor<fp16, [1024]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9861568)))];
+            fp16 var_114_to_fp16 = const()[name = string("op_114_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_124_cast_fp16 = layer_norm(axes = var_124_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_114_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_95_cast_fp16)[name = string("op_124_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_135_to_fp16 = const()[name = string("op_135_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9863680)))];
+            tensor<fp16, [1024]> var_136_to_fp16 = const()[name = string("op_136_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11960896)))];
+            tensor<fp16, [1, 1500, 1024]> linear_0_cast_fp16 = linear(bias = var_136_to_fp16, weight = var_135_to_fp16, x = var_124_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_139_to_fp16 = const()[name = string("op_139_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11963008)))];
+            tensor<fp16, [1024]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14060224)))];
+            tensor<fp16, [1, 1500, 1024]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_139_to_fp16, x = var_124_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_143_to_fp16 = const()[name = string("op_143_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14062336)))];
+            tensor<fp16, [1024]> var_144_to_fp16 = const()[name = string("op_144_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16159552)))];
+            tensor<fp16, [1, 1500, 1024]> linear_2_cast_fp16 = linear(bias = var_144_to_fp16, weight = var_143_to_fp16, x = var_124_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [4]> var_152 = const()[name = string("op_152"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_153_cast_fp16 = reshape(shape = var_152, x = linear_0_cast_fp16)[name = string("op_153_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_3_cast_fp16 = mul(x = var_153_cast_fp16, y = const_168_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> var_159 = const()[name = string("op_159"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_160_cast_fp16 = reshape(shape = var_159, x = linear_1_cast_fp16)[name = string("op_160_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_3_cast_fp16 = mul(x = var_160_cast_fp16, y = const_169_to_fp16)[name = string("k_3_cast_fp16")];
+            tensor<int32, [4]> var_166 = const()[name = string("op_166"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_167_cast_fp16 = reshape(shape = var_166, x = linear_2_cast_fp16)[name = string("op_167_cast_fp16")];
+            tensor<int32, [4]> var_168 = const()[name = string("op_168"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_97 = transpose(perm = transpose_97_perm_0, x = k_3_cast_fp16)[name = string("transpose_237")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = q_3_cast_fp16)[name = string("transpose_238")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_96, y = transpose_97)[name = string("qk_1_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_172_cast_fp16 = softmax(axis = var_108, x = qk_1_cast_fp16)[name = string("op_172_cast_fp16")];
+            bool var_174_transpose_x_0 = const()[name = string("op_174_transpose_x_0"), val = bool(false)];
+            bool var_174_transpose_y_0 = const()[name = string("op_174_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_3_cast_fp16 = transpose(perm = var_168, x = var_167_cast_fp16)[name = string("transpose_239")];
+            tensor<fp16, [1, 16, 1500, 64]> var_174_cast_fp16 = matmul(transpose_x = var_174_transpose_x_0, transpose_y = var_174_transpose_y_0, x = var_172_cast_fp16, y = v_3_cast_fp16)[name = string("op_174_cast_fp16")];
+            tensor<int32, [4]> var_175 = const()[name = string("op_175"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_176_cast_fp16 = transpose(perm = var_175, x = var_174_cast_fp16)[name = string("transpose_236")];
+            tensor<fp16, [1, 1500, 1024]> x_11_cast_fp16 = reshape(shape = concat_0, x = var_176_cast_fp16)[name = string("x_11_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_180_to_fp16 = const()[name = string("op_180_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16161664)))];
+            tensor<fp16, [1024]> var_181_to_fp16 = const()[name = string("op_181_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18258880)))];
+            tensor<fp16, [1, 1500, 1024]> linear_3_cast_fp16 = linear(bias = var_181_to_fp16, weight = var_180_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_13_cast_fp16 = add(x = var_95_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<int32, [1]> var_188_axes_0 = const()[name = string("op_188_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18260992)))];
+            tensor<fp16, [1024]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18263104)))];
+            tensor<fp16, [1, 1500, 1024]> var_188_cast_fp16 = layer_norm(axes = var_188_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_114_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_188_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_197_to_fp16 = const()[name = string("op_197_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18265216)))];
+            tensor<fp16, [4096]> var_198_to_fp16 = const()[name = string("op_198_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26653888)))];
+            tensor<fp16, [1, 1500, 4096]> linear_4_cast_fp16 = linear(bias = var_198_to_fp16, weight = var_197_to_fp16, x = var_188_cast_fp16)[name = string("linear_4_cast_fp16")];
+            string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_203_to_fp16 = const()[name = string("op_203_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26662144)))];
+            tensor<fp16, [1024]> var_204_to_fp16 = const()[name = string("op_204_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35050816)))];
+            tensor<fp16, [1, 1500, 1024]> linear_5_cast_fp16 = linear(bias = var_204_to_fp16, weight = var_203_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")];
+            int32 var_214 = const()[name = string("op_214"), val = int32(-1)];
+            tensor<int32, [1]> var_230_axes_0 = const()[name = string("op_230_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35052928)))];
+            tensor<fp16, [1024]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35055040)))];
+            fp16 var_220_to_fp16 = const()[name = string("op_220_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_230_cast_fp16 = layer_norm(axes = var_230_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_220_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_230_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_241_to_fp16 = const()[name = string("op_241_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35057152)))];
+            tensor<fp16, [1024]> var_242_to_fp16 = const()[name = string("op_242_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37154368)))];
+            tensor<fp16, [1, 1500, 1024]> linear_6_cast_fp16 = linear(bias = var_242_to_fp16, weight = var_241_to_fp16, x = var_230_cast_fp16)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_245_to_fp16 = const()[name = string("op_245_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37156480)))];
+            tensor<fp16, [1, 1500, 1024]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_245_to_fp16, x = var_230_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_249_to_fp16 = const()[name = string("op_249_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39253696)))];
+            tensor<fp16, [1024]> var_250_to_fp16 = const()[name = string("op_250_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41350912)))];
+            tensor<fp16, [1, 1500, 1024]> linear_8_cast_fp16 = linear(bias = var_250_to_fp16, weight = var_249_to_fp16, x = var_230_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<int32, [4]> var_258 = const()[name = string("op_258"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_259_cast_fp16 = reshape(shape = var_258, x = linear_6_cast_fp16)[name = string("op_259_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_7_cast_fp16 = mul(x = var_259_cast_fp16, y = const_170_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_265 = const()[name = string("op_265"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_266_cast_fp16 = reshape(shape = var_265, x = linear_7_cast_fp16)[name = string("op_266_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_7_cast_fp16 = mul(x = var_266_cast_fp16, y = const_171_to_fp16)[name = string("k_7_cast_fp16")];
+            tensor<int32, [4]> var_272 = const()[name = string("op_272"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_273_cast_fp16 = reshape(shape = var_272, x = linear_8_cast_fp16)[name = string("op_273_cast_fp16")];
+            tensor<int32, [4]> var_274 = const()[name = string("op_274"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)];
+            bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_99 = transpose(perm = transpose_99_perm_0, x = k_7_cast_fp16)[name = string("transpose_233")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_98 = transpose(perm = transpose_98_perm_0, x = q_7_cast_fp16)[name = string("transpose_234")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_98, y = transpose_99)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_278_cast_fp16 = softmax(axis = var_214, x = qk_3_cast_fp16)[name = string("op_278_cast_fp16")];
+            bool var_280_transpose_x_0 = const()[name = string("op_280_transpose_x_0"), val = bool(false)];
+            bool var_280_transpose_y_0 = const()[name = string("op_280_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_7_cast_fp16 = transpose(perm = var_274, x = var_273_cast_fp16)[name = string("transpose_235")];
+            tensor<fp16, [1, 16, 1500, 64]> var_280_cast_fp16 = matmul(transpose_x = var_280_transpose_x_0, transpose_y = var_280_transpose_y_0, x = var_278_cast_fp16, y = v_7_cast_fp16)[name = string("op_280_cast_fp16")];
+            tensor<int32, [4]> var_281 = const()[name = string("op_281"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_282_cast_fp16 = transpose(perm = var_281, x = var_280_cast_fp16)[name = string("transpose_232")];
+            tensor<fp16, [1, 1500, 1024]> x_23_cast_fp16 = reshape(shape = concat_1, x = var_282_cast_fp16)[name = string("x_23_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_286_to_fp16 = const()[name = string("op_286_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41353024)))];
+            tensor<fp16, [1024]> var_287_to_fp16 = const()[name = string("op_287_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43450240)))];
+            tensor<fp16, [1, 1500, 1024]> linear_9_cast_fp16 = linear(bias = var_287_to_fp16, weight = var_286_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<int32, [1]> var_294_axes_0 = const()[name = string("op_294_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43452352)))];
+            tensor<fp16, [1024]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43454464)))];
+            tensor<fp16, [1, 1500, 1024]> var_294_cast_fp16 = layer_norm(axes = var_294_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_220_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_294_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_303_to_fp16 = const()[name = string("op_303_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43456576)))];
+            tensor<fp16, [4096]> var_304_to_fp16 = const()[name = string("op_304_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51845248)))];
+            tensor<fp16, [1, 1500, 4096]> linear_10_cast_fp16 = linear(bias = var_304_to_fp16, weight = var_303_to_fp16, x = var_294_cast_fp16)[name = string("linear_10_cast_fp16")];
+            string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_309_to_fp16 = const()[name = string("op_309_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51853504)))];
+            tensor<fp16, [1024]> var_310_to_fp16 = const()[name = string("op_310_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60242176)))];
+            tensor<fp16, [1, 1500, 1024]> linear_11_cast_fp16 = linear(bias = var_310_to_fp16, weight = var_309_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")];
+            int32 var_320 = const()[name = string("op_320"), val = int32(-1)];
+            tensor<int32, [1]> var_336_axes_0 = const()[name = string("op_336_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60244288)))];
+            tensor<fp16, [1024]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60246400)))];
+            fp16 var_326_to_fp16 = const()[name = string("op_326_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_336_cast_fp16 = layer_norm(axes = var_336_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_326_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_336_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_347_to_fp16 = const()[name = string("op_347_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60248512)))];
+            tensor<fp16, [1024]> var_348_to_fp16 = const()[name = string("op_348_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62345728)))];
+            tensor<fp16, [1, 1500, 1024]> linear_12_cast_fp16 = linear(bias = var_348_to_fp16, weight = var_347_to_fp16, x = var_336_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_351_to_fp16 = const()[name = string("op_351_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62347840)))];
+            tensor<fp16, [1, 1500, 1024]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_351_to_fp16, x = var_336_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_355_to_fp16 = const()[name = string("op_355_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64445056)))];
+            tensor<fp16, [1024]> var_356_to_fp16 = const()[name = string("op_356_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66542272)))];
+            tensor<fp16, [1, 1500, 1024]> linear_14_cast_fp16 = linear(bias = var_356_to_fp16, weight = var_355_to_fp16, x = var_336_cast_fp16)[name = string("linear_14_cast_fp16")];
+            tensor<int32, [4]> var_364 = const()[name = string("op_364"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_365_cast_fp16 = reshape(shape = var_364, x = linear_12_cast_fp16)[name = string("op_365_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_11_cast_fp16 = mul(x = var_365_cast_fp16, y = const_172_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> var_371 = const()[name = string("op_371"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_372_cast_fp16 = reshape(shape = var_371, x = linear_13_cast_fp16)[name = string("op_372_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_11_cast_fp16 = mul(x = var_372_cast_fp16, y = const_173_to_fp16)[name = string("k_11_cast_fp16")];
+            tensor<int32, [4]> var_378 = const()[name = string("op_378"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_379_cast_fp16 = reshape(shape = var_378, x = linear_14_cast_fp16)[name = string("op_379_cast_fp16")];
+            tensor<int32, [4]> var_380 = const()[name = string("op_380"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_101 = transpose(perm = transpose_101_perm_0, x = k_11_cast_fp16)[name = string("transpose_229")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = q_11_cast_fp16)[name = string("transpose_230")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_100, y = transpose_101)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_384_cast_fp16 = softmax(axis = var_320, x = qk_5_cast_fp16)[name = string("op_384_cast_fp16")];
+            bool var_386_transpose_x_0 = const()[name = string("op_386_transpose_x_0"), val = bool(false)];
+            bool var_386_transpose_y_0 = const()[name = string("op_386_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_11_cast_fp16 = transpose(perm = var_380, x = var_379_cast_fp16)[name = string("transpose_231")];
+            tensor<fp16, [1, 16, 1500, 64]> var_386_cast_fp16 = matmul(transpose_x = var_386_transpose_x_0, transpose_y = var_386_transpose_y_0, x = var_384_cast_fp16, y = v_11_cast_fp16)[name = string("op_386_cast_fp16")];
+            tensor<int32, [4]> var_387 = const()[name = string("op_387"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_388_cast_fp16 = transpose(perm = var_387, x = var_386_cast_fp16)[name = string("transpose_228")];
+            tensor<fp16, [1, 1500, 1024]> x_35_cast_fp16 = reshape(shape = concat_2, x = var_388_cast_fp16)[name = string("x_35_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_392_to_fp16 = const()[name = string("op_392_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66544384)))];
+            tensor<fp16, [1024]> var_393_to_fp16 = const()[name = string("op_393_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68641600)))];
+            tensor<fp16, [1, 1500, 1024]> linear_15_cast_fp16 = linear(bias = var_393_to_fp16, weight = var_392_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<int32, [1]> var_400_axes_0 = const()[name = string("op_400_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68643712)))];
+            tensor<fp16, [1024]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68645824)))];
+            tensor<fp16, [1, 1500, 1024]> var_400_cast_fp16 = layer_norm(axes = var_400_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_326_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_400_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_409_to_fp16 = const()[name = string("op_409_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68647936)))];
+            tensor<fp16, [4096]> var_410_to_fp16 = const()[name = string("op_410_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77036608)))];
+            tensor<fp16, [1, 1500, 4096]> linear_16_cast_fp16 = linear(bias = var_410_to_fp16, weight = var_409_to_fp16, x = var_400_cast_fp16)[name = string("linear_16_cast_fp16")];
+            string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_415_to_fp16 = const()[name = string("op_415_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77044864)))];
+            tensor<fp16, [1024]> var_416_to_fp16 = const()[name = string("op_416_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85433536)))];
+            tensor<fp16, [1, 1500, 1024]> linear_17_cast_fp16 = linear(bias = var_416_to_fp16, weight = var_415_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")];
+            int32 var_426 = const()[name = string("op_426"), val = int32(-1)];
+            tensor<int32, [1]> var_442_axes_0 = const()[name = string("op_442_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85435648)))];
+            tensor<fp16, [1024]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85437760)))];
+            fp16 var_432_to_fp16 = const()[name = string("op_432_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_442_cast_fp16 = layer_norm(axes = var_442_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_432_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_442_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_453_to_fp16 = const()[name = string("op_453_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85439872)))];
+            tensor<fp16, [1024]> var_454_to_fp16 = const()[name = string("op_454_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87537088)))];
+            tensor<fp16, [1, 1500, 1024]> linear_18_cast_fp16 = linear(bias = var_454_to_fp16, weight = var_453_to_fp16, x = var_442_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_457_to_fp16 = const()[name = string("op_457_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87539200)))];
+            tensor<fp16, [1, 1500, 1024]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_457_to_fp16, x = var_442_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_461_to_fp16 = const()[name = string("op_461_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89636416)))];
+            tensor<fp16, [1024]> var_462_to_fp16 = const()[name = string("op_462_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91733632)))];
+            tensor<fp16, [1, 1500, 1024]> linear_20_cast_fp16 = linear(bias = var_462_to_fp16, weight = var_461_to_fp16, x = var_442_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [4]> var_470 = const()[name = string("op_470"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_471_cast_fp16 = reshape(shape = var_470, x = linear_18_cast_fp16)[name = string("op_471_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_15_cast_fp16 = mul(x = var_471_cast_fp16, y = const_174_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_477 = const()[name = string("op_477"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_478_cast_fp16 = reshape(shape = var_477, x = linear_19_cast_fp16)[name = string("op_478_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_15_cast_fp16 = mul(x = var_478_cast_fp16, y = const_175_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> var_484 = const()[name = string("op_484"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_485_cast_fp16 = reshape(shape = var_484, x = linear_20_cast_fp16)[name = string("op_485_cast_fp16")];
+            tensor<int32, [4]> var_486 = const()[name = string("op_486"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_103 = transpose(perm = transpose_103_perm_0, x = k_15_cast_fp16)[name = string("transpose_225")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_102 = transpose(perm = transpose_102_perm_0, x = q_15_cast_fp16)[name = string("transpose_226")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_102, y = transpose_103)[name = string("qk_7_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_490_cast_fp16 = softmax(axis = var_426, x = qk_7_cast_fp16)[name = string("op_490_cast_fp16")];
+            bool var_492_transpose_x_0 = const()[name = string("op_492_transpose_x_0"), val = bool(false)];
+            bool var_492_transpose_y_0 = const()[name = string("op_492_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_15_cast_fp16 = transpose(perm = var_486, x = var_485_cast_fp16)[name = string("transpose_227")];
+            tensor<fp16, [1, 16, 1500, 64]> var_492_cast_fp16 = matmul(transpose_x = var_492_transpose_x_0, transpose_y = var_492_transpose_y_0, x = var_490_cast_fp16, y = v_15_cast_fp16)[name = string("op_492_cast_fp16")];
+            tensor<int32, [4]> var_493 = const()[name = string("op_493"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_494_cast_fp16 = transpose(perm = var_493, x = var_492_cast_fp16)[name = string("transpose_224")];
+            tensor<fp16, [1, 1500, 1024]> x_47_cast_fp16 = reshape(shape = concat_3, x = var_494_cast_fp16)[name = string("x_47_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_498_to_fp16 = const()[name = string("op_498_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91735744)))];
+            tensor<fp16, [1024]> var_499_to_fp16 = const()[name = string("op_499_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93832960)))];
+            tensor<fp16, [1, 1500, 1024]> linear_21_cast_fp16 = linear(bias = var_499_to_fp16, weight = var_498_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<int32, [1]> var_506_axes_0 = const()[name = string("op_506_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93835072)))];
+            tensor<fp16, [1024]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93837184)))];
+            tensor<fp16, [1, 1500, 1024]> var_506_cast_fp16 = layer_norm(axes = var_506_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_432_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_506_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93839296)))];
+            tensor<fp16, [4096]> var_516_to_fp16 = const()[name = string("op_516_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102227968)))];
+            tensor<fp16, [1, 1500, 4096]> linear_22_cast_fp16 = linear(bias = var_516_to_fp16, weight = var_515_to_fp16, x = var_506_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_521_to_fp16 = const()[name = string("op_521_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102236224)))];
+            tensor<fp16, [1024]> var_522_to_fp16 = const()[name = string("op_522_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110624896)))];
+            tensor<fp16, [1, 1500, 1024]> linear_23_cast_fp16 = linear(bias = var_522_to_fp16, weight = var_521_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_55_cast_fp16")];
+            int32 var_532 = const()[name = string("op_532"), val = int32(-1)];
+            tensor<int32, [1]> var_548_axes_0 = const()[name = string("op_548_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110627008)))];
+            tensor<fp16, [1024]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110629120)))];
+            fp16 var_538_to_fp16 = const()[name = string("op_538_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_548_cast_fp16 = layer_norm(axes = var_548_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_538_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = string("op_548_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110631232)))];
+            tensor<fp16, [1024]> var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112728448)))];
+            tensor<fp16, [1, 1500, 1024]> linear_24_cast_fp16 = linear(bias = var_560_to_fp16, weight = var_559_to_fp16, x = var_548_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_563_to_fp16 = const()[name = string("op_563_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112730560)))];
+            tensor<fp16, [1, 1500, 1024]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_563_to_fp16, x = var_548_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_567_to_fp16 = const()[name = string("op_567_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114827776)))];
+            tensor<fp16, [1024]> var_568_to_fp16 = const()[name = string("op_568_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116924992)))];
+            tensor<fp16, [1, 1500, 1024]> linear_26_cast_fp16 = linear(bias = var_568_to_fp16, weight = var_567_to_fp16, x = var_548_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [4]> var_576 = const()[name = string("op_576"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_577_cast_fp16 = reshape(shape = var_576, x = linear_24_cast_fp16)[name = string("op_577_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_19_cast_fp16 = mul(x = var_577_cast_fp16, y = const_176_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> var_583 = const()[name = string("op_583"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_584_cast_fp16 = reshape(shape = var_583, x = linear_25_cast_fp16)[name = string("op_584_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_19_cast_fp16 = mul(x = var_584_cast_fp16, y = const_177_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_590 = const()[name = string("op_590"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_591_cast_fp16 = reshape(shape = var_590, x = linear_26_cast_fp16)[name = string("op_591_cast_fp16")];
+            tensor<int32, [4]> var_592 = const()[name = string("op_592"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_9_transpose_x_0 = const()[name = string("qk_9_transpose_x_0"), val = bool(false)];
+            bool qk_9_transpose_y_0 = const()[name = string("qk_9_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_105 = transpose(perm = transpose_105_perm_0, x = k_19_cast_fp16)[name = string("transpose_221")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = q_19_cast_fp16)[name = string("transpose_222")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_104, y = transpose_105)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_596_cast_fp16 = softmax(axis = var_532, x = qk_9_cast_fp16)[name = string("op_596_cast_fp16")];
+            bool var_598_transpose_x_0 = const()[name = string("op_598_transpose_x_0"), val = bool(false)];
+            bool var_598_transpose_y_0 = const()[name = string("op_598_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_592, x = var_591_cast_fp16)[name = string("transpose_223")];
+            tensor<fp16, [1, 16, 1500, 64]> var_598_cast_fp16 = matmul(transpose_x = var_598_transpose_x_0, transpose_y = var_598_transpose_y_0, x = var_596_cast_fp16, y = v_19_cast_fp16)[name = string("op_598_cast_fp16")];
+            tensor<int32, [4]> var_599 = const()[name = string("op_599"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_4 = const()[name = string("concat_4"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_600_cast_fp16 = transpose(perm = var_599, x = var_598_cast_fp16)[name = string("transpose_220")];
+            tensor<fp16, [1, 1500, 1024]> x_59_cast_fp16 = reshape(shape = concat_4, x = var_600_cast_fp16)[name = string("x_59_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_604_to_fp16 = const()[name = string("op_604_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116927104)))];
+            tensor<fp16, [1024]> var_605_to_fp16 = const()[name = string("op_605_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119024320)))];
+            tensor<fp16, [1, 1500, 1024]> linear_27_cast_fp16 = linear(bias = var_605_to_fp16, weight = var_604_to_fp16, x = x_59_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<int32, [1]> var_612_axes_0 = const()[name = string("op_612_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119026432)))];
+            tensor<fp16, [1024]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119028544)))];
+            tensor<fp16, [1, 1500, 1024]> var_612_cast_fp16 = layer_norm(axes = var_612_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_538_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = string("op_612_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_621_to_fp16 = const()[name = string("op_621_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119030656)))];
+            tensor<fp16, [4096]> var_622_to_fp16 = const()[name = string("op_622_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127419328)))];
+            tensor<fp16, [1, 1500, 4096]> linear_28_cast_fp16 = linear(bias = var_622_to_fp16, weight = var_621_to_fp16, x = var_612_cast_fp16)[name = string("linear_28_cast_fp16")];
+            string x_65_mode_0 = const()[name = string("x_65_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = string("x_65_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_627_to_fp16 = const()[name = string("op_627_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127427584)))];
+            tensor<fp16, [1024]> var_628_to_fp16 = const()[name = string("op_628_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135816256)))];
+            tensor<fp16, [1, 1500, 1024]> linear_29_cast_fp16 = linear(bias = var_628_to_fp16, weight = var_627_to_fp16, x = x_65_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = string("x_67_cast_fp16")];
+            int32 var_638 = const()[name = string("op_638"), val = int32(-1)];
+            tensor<int32, [1]> var_654_axes_0 = const()[name = string("op_654_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135818368)))];
+            tensor<fp16, [1024]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135820480)))];
+            fp16 var_644_to_fp16 = const()[name = string("op_644_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_654_cast_fp16 = layer_norm(axes = var_654_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_644_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = string("op_654_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_665_to_fp16 = const()[name = string("op_665_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135822592)))];
+            tensor<fp16, [1024]> var_666_to_fp16 = const()[name = string("op_666_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137919808)))];
+            tensor<fp16, [1, 1500, 1024]> linear_30_cast_fp16 = linear(bias = var_666_to_fp16, weight = var_665_to_fp16, x = var_654_cast_fp16)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137921920)))];
+            tensor<fp16, [1, 1500, 1024]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_669_to_fp16, x = var_654_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_673_to_fp16 = const()[name = string("op_673_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140019136)))];
+            tensor<fp16, [1024]> var_674_to_fp16 = const()[name = string("op_674_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142116352)))];
+            tensor<fp16, [1, 1500, 1024]> linear_32_cast_fp16 = linear(bias = var_674_to_fp16, weight = var_673_to_fp16, x = var_654_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<int32, [4]> var_682 = const()[name = string("op_682"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_683_cast_fp16 = reshape(shape = var_682, x = linear_30_cast_fp16)[name = string("op_683_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_23_cast_fp16 = mul(x = var_683_cast_fp16, y = const_178_to_fp16)[name = string("q_23_cast_fp16")];
+            tensor<int32, [4]> var_689 = const()[name = string("op_689"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_690_cast_fp16 = reshape(shape = var_689, x = linear_31_cast_fp16)[name = string("op_690_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_23_cast_fp16 = mul(x = var_690_cast_fp16, y = const_179_to_fp16)[name = string("k_23_cast_fp16")];
+            tensor<int32, [4]> var_696 = const()[name = string("op_696"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_697_cast_fp16 = reshape(shape = var_696, x = linear_32_cast_fp16)[name = string("op_697_cast_fp16")];
+            tensor<int32, [4]> var_698 = const()[name = string("op_698"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
+            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_107 = transpose(perm = transpose_107_perm_0, x = k_23_cast_fp16)[name = string("transpose_217")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_106 = transpose(perm = transpose_106_perm_0, x = q_23_cast_fp16)[name = string("transpose_218")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_106, y = transpose_107)[name = string("qk_11_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_702_cast_fp16 = softmax(axis = var_638, x = qk_11_cast_fp16)[name = string("op_702_cast_fp16")];
+            bool var_704_transpose_x_0 = const()[name = string("op_704_transpose_x_0"), val = bool(false)];
+            bool var_704_transpose_y_0 = const()[name = string("op_704_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_23_cast_fp16 = transpose(perm = var_698, x = var_697_cast_fp16)[name = string("transpose_219")];
+            tensor<fp16, [1, 16, 1500, 64]> var_704_cast_fp16 = matmul(transpose_x = var_704_transpose_x_0, transpose_y = var_704_transpose_y_0, x = var_702_cast_fp16, y = v_23_cast_fp16)[name = string("op_704_cast_fp16")];
+            tensor<int32, [4]> var_705 = const()[name = string("op_705"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_706_cast_fp16 = transpose(perm = var_705, x = var_704_cast_fp16)[name = string("transpose_216")];
+            tensor<fp16, [1, 1500, 1024]> x_71_cast_fp16 = reshape(shape = concat_5, x = var_706_cast_fp16)[name = string("x_71_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_710_to_fp16 = const()[name = string("op_710_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142118464)))];
+            tensor<fp16, [1024]> var_711_to_fp16 = const()[name = string("op_711_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144215680)))];
+            tensor<fp16, [1, 1500, 1024]> linear_33_cast_fp16 = linear(bias = var_711_to_fp16, weight = var_710_to_fp16, x = x_71_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<int32, [1]> var_718_axes_0 = const()[name = string("op_718_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144217792)))];
+            tensor<fp16, [1024]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144219904)))];
+            tensor<fp16, [1, 1500, 1024]> var_718_cast_fp16 = layer_norm(axes = var_718_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_644_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = string("op_718_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144222016)))];
+            tensor<fp16, [4096]> var_728_to_fp16 = const()[name = string("op_728_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152610688)))];
+            tensor<fp16, [1, 1500, 4096]> linear_34_cast_fp16 = linear(bias = var_728_to_fp16, weight = var_727_to_fp16, x = var_718_cast_fp16)[name = string("linear_34_cast_fp16")];
+            string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = string("x_77_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_733_to_fp16 = const()[name = string("op_733_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152618944)))];
+            tensor<fp16, [1024]> var_734_to_fp16 = const()[name = string("op_734_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161007616)))];
+            tensor<fp16, [1, 1500, 1024]> linear_35_cast_fp16 = linear(bias = var_734_to_fp16, weight = var_733_to_fp16, x = x_77_cast_fp16)[name = string("linear_35_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_79_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = string("x_79_cast_fp16")];
+            int32 var_744 = const()[name = string("op_744"), val = int32(-1)];
+            tensor<int32, [1]> var_760_axes_0 = const()[name = string("op_760_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161009728)))];
+            tensor<fp16, [1024]> blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161011840)))];
+            fp16 var_750_to_fp16 = const()[name = string("op_750_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_760_cast_fp16 = layer_norm(axes = var_760_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_750_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_79_cast_fp16)[name = string("op_760_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_771_to_fp16 = const()[name = string("op_771_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161013952)))];
+            tensor<fp16, [1024]> var_772_to_fp16 = const()[name = string("op_772_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163111168)))];
+            tensor<fp16, [1, 1500, 1024]> linear_36_cast_fp16 = linear(bias = var_772_to_fp16, weight = var_771_to_fp16, x = var_760_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_775_to_fp16 = const()[name = string("op_775_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163113280)))];
+            tensor<fp16, [1, 1500, 1024]> linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_775_to_fp16, x = var_760_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_779_to_fp16 = const()[name = string("op_779_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165210496)))];
+            tensor<fp16, [1024]> var_780_to_fp16 = const()[name = string("op_780_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167307712)))];
+            tensor<fp16, [1, 1500, 1024]> linear_38_cast_fp16 = linear(bias = var_780_to_fp16, weight = var_779_to_fp16, x = var_760_cast_fp16)[name = string("linear_38_cast_fp16")];
+            tensor<int32, [4]> var_788 = const()[name = string("op_788"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_789_cast_fp16 = reshape(shape = var_788, x = linear_36_cast_fp16)[name = string("op_789_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_27_cast_fp16 = mul(x = var_789_cast_fp16, y = const_180_to_fp16)[name = string("q_27_cast_fp16")];
+            tensor<int32, [4]> var_795 = const()[name = string("op_795"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_796_cast_fp16 = reshape(shape = var_795, x = linear_37_cast_fp16)[name = string("op_796_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_27_cast_fp16 = mul(x = var_796_cast_fp16, y = const_181_to_fp16)[name = string("k_27_cast_fp16")];
+            tensor<int32, [4]> var_802 = const()[name = string("op_802"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_803_cast_fp16 = reshape(shape = var_802, x = linear_38_cast_fp16)[name = string("op_803_cast_fp16")];
+            tensor<int32, [4]> var_804 = const()[name = string("op_804"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
+            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_108_perm_0 = const()[name = string("transpose_108_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_109_perm_0 = const()[name = string("transpose_109_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_109 = transpose(perm = transpose_109_perm_0, x = k_27_cast_fp16)[name = string("transpose_213")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_108 = transpose(perm = transpose_108_perm_0, x = q_27_cast_fp16)[name = string("transpose_214")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_108, y = transpose_109)[name = string("qk_13_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_808_cast_fp16 = softmax(axis = var_744, x = qk_13_cast_fp16)[name = string("op_808_cast_fp16")];
+            bool var_810_transpose_x_0 = const()[name = string("op_810_transpose_x_0"), val = bool(false)];
+            bool var_810_transpose_y_0 = const()[name = string("op_810_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_27_cast_fp16 = transpose(perm = var_804, x = var_803_cast_fp16)[name = string("transpose_215")];
+            tensor<fp16, [1, 16, 1500, 64]> var_810_cast_fp16 = matmul(transpose_x = var_810_transpose_x_0, transpose_y = var_810_transpose_y_0, x = var_808_cast_fp16, y = v_27_cast_fp16)[name = string("op_810_cast_fp16")];
+            tensor<int32, [4]> var_811 = const()[name = string("op_811"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_6 = const()[name = string("concat_6"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_812_cast_fp16 = transpose(perm = var_811, x = var_810_cast_fp16)[name = string("transpose_212")];
+            tensor<fp16, [1, 1500, 1024]> x_83_cast_fp16 = reshape(shape = concat_6, x = var_812_cast_fp16)[name = string("x_83_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_816_to_fp16 = const()[name = string("op_816_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167309824)))];
+            tensor<fp16, [1024]> var_817_to_fp16 = const()[name = string("op_817_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169407040)))];
+            tensor<fp16, [1, 1500, 1024]> linear_39_cast_fp16 = linear(bias = var_817_to_fp16, weight = var_816_to_fp16, x = x_83_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_85_cast_fp16 = add(x = x_79_cast_fp16, y = linear_39_cast_fp16)[name = string("x_85_cast_fp16")];
+            tensor<int32, [1]> var_824_axes_0 = const()[name = string("op_824_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169409152)))];
+            tensor<fp16, [1024]> blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169411264)))];
+            tensor<fp16, [1, 1500, 1024]> var_824_cast_fp16 = layer_norm(axes = var_824_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_750_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_85_cast_fp16)[name = string("op_824_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169413376)))];
+            tensor<fp16, [4096]> var_834_to_fp16 = const()[name = string("op_834_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177802048)))];
+            tensor<fp16, [1, 1500, 4096]> linear_40_cast_fp16 = linear(bias = var_834_to_fp16, weight = var_833_to_fp16, x = var_824_cast_fp16)[name = string("linear_40_cast_fp16")];
+            string x_89_mode_0 = const()[name = string("x_89_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_89_cast_fp16 = gelu(mode = x_89_mode_0, x = linear_40_cast_fp16)[name = string("x_89_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_839_to_fp16 = const()[name = string("op_839_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177810304)))];
+            tensor<fp16, [1024]> var_840_to_fp16 = const()[name = string("op_840_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186198976)))];
+            tensor<fp16, [1, 1500, 1024]> linear_41_cast_fp16 = linear(bias = var_840_to_fp16, weight = var_839_to_fp16, x = x_89_cast_fp16)[name = string("linear_41_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_91_cast_fp16 = add(x = x_85_cast_fp16, y = linear_41_cast_fp16)[name = string("x_91_cast_fp16")];
+            int32 var_850 = const()[name = string("op_850"), val = int32(-1)];
+            tensor<int32, [1]> var_866_axes_0 = const()[name = string("op_866_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186201088)))];
+            tensor<fp16, [1024]> blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186203200)))];
+            fp16 var_856_to_fp16 = const()[name = string("op_856_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_866_cast_fp16 = layer_norm(axes = var_866_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_856_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_91_cast_fp16)[name = string("op_866_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_877_to_fp16 = const()[name = string("op_877_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186205312)))];
+            tensor<fp16, [1024]> var_878_to_fp16 = const()[name = string("op_878_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188302528)))];
+            tensor<fp16, [1, 1500, 1024]> linear_42_cast_fp16 = linear(bias = var_878_to_fp16, weight = var_877_to_fp16, x = var_866_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_881_to_fp16 = const()[name = string("op_881_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188304640)))];
+            tensor<fp16, [1, 1500, 1024]> linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_881_to_fp16, x = var_866_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_885_to_fp16 = const()[name = string("op_885_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190401856)))];
+            tensor<fp16, [1024]> var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192499072)))];
+            tensor<fp16, [1, 1500, 1024]> linear_44_cast_fp16 = linear(bias = var_886_to_fp16, weight = var_885_to_fp16, x = var_866_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [4]> var_894 = const()[name = string("op_894"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_895_cast_fp16 = reshape(shape = var_894, x = linear_42_cast_fp16)[name = string("op_895_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_31_cast_fp16 = mul(x = var_895_cast_fp16, y = const_182_to_fp16)[name = string("q_31_cast_fp16")];
+            tensor<int32, [4]> var_901 = const()[name = string("op_901"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_902_cast_fp16 = reshape(shape = var_901, x = linear_43_cast_fp16)[name = string("op_902_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_31_cast_fp16 = mul(x = var_902_cast_fp16, y = const_183_to_fp16)[name = string("k_31_cast_fp16")];
+            tensor<int32, [4]> var_908 = const()[name = string("op_908"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_909_cast_fp16 = reshape(shape = var_908, x = linear_44_cast_fp16)[name = string("op_909_cast_fp16")];
+            tensor<int32, [4]> var_910 = const()[name = string("op_910"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_15_transpose_x_0 = const()[name = string("qk_15_transpose_x_0"), val = bool(false)];
+            bool qk_15_transpose_y_0 = const()[name = string("qk_15_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_110_perm_0 = const()[name = string("transpose_110_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_111_perm_0 = const()[name = string("transpose_111_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_111 = transpose(perm = transpose_111_perm_0, x = k_31_cast_fp16)[name = string("transpose_209")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_110 = transpose(perm = transpose_110_perm_0, x = q_31_cast_fp16)[name = string("transpose_210")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_15_cast_fp16 = matmul(transpose_x = qk_15_transpose_x_0, transpose_y = qk_15_transpose_y_0, x = transpose_110, y = transpose_111)[name = string("qk_15_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_914_cast_fp16 = softmax(axis = var_850, x = qk_15_cast_fp16)[name = string("op_914_cast_fp16")];
+            bool var_916_transpose_x_0 = const()[name = string("op_916_transpose_x_0"), val = bool(false)];
+            bool var_916_transpose_y_0 = const()[name = string("op_916_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_31_cast_fp16 = transpose(perm = var_910, x = var_909_cast_fp16)[name = string("transpose_211")];
+            tensor<fp16, [1, 16, 1500, 64]> var_916_cast_fp16 = matmul(transpose_x = var_916_transpose_x_0, transpose_y = var_916_transpose_y_0, x = var_914_cast_fp16, y = v_31_cast_fp16)[name = string("op_916_cast_fp16")];
+            tensor<int32, [4]> var_917 = const()[name = string("op_917"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_7 = const()[name = string("concat_7"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_918_cast_fp16 = transpose(perm = var_917, x = var_916_cast_fp16)[name = string("transpose_208")];
+            tensor<fp16, [1, 1500, 1024]> x_95_cast_fp16 = reshape(shape = concat_7, x = var_918_cast_fp16)[name = string("x_95_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192501184)))];
+            tensor<fp16, [1024]> var_923_to_fp16 = const()[name = string("op_923_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194598400)))];
+            tensor<fp16, [1, 1500, 1024]> linear_45_cast_fp16 = linear(bias = var_923_to_fp16, weight = var_922_to_fp16, x = x_95_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_97_cast_fp16 = add(x = x_91_cast_fp16, y = linear_45_cast_fp16)[name = string("x_97_cast_fp16")];
+            tensor<int32, [1]> var_930_axes_0 = const()[name = string("op_930_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194600512)))];
+            tensor<fp16, [1024]> blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194602624)))];
+            tensor<fp16, [1, 1500, 1024]> var_930_cast_fp16 = layer_norm(axes = var_930_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_856_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_97_cast_fp16)[name = string("op_930_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_939_to_fp16 = const()[name = string("op_939_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194604736)))];
+            tensor<fp16, [4096]> var_940_to_fp16 = const()[name = string("op_940_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202993408)))];
+            tensor<fp16, [1, 1500, 4096]> linear_46_cast_fp16 = linear(bias = var_940_to_fp16, weight = var_939_to_fp16, x = var_930_cast_fp16)[name = string("linear_46_cast_fp16")];
+            string x_101_mode_0 = const()[name = string("x_101_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_101_cast_fp16 = gelu(mode = x_101_mode_0, x = linear_46_cast_fp16)[name = string("x_101_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_945_to_fp16 = const()[name = string("op_945_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203001664)))];
+            tensor<fp16, [1024]> var_946_to_fp16 = const()[name = string("op_946_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211390336)))];
+            tensor<fp16, [1, 1500, 1024]> linear_47_cast_fp16 = linear(bias = var_946_to_fp16, weight = var_945_to_fp16, x = x_101_cast_fp16)[name = string("linear_47_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_47_cast_fp16)[name = string("x_103_cast_fp16")];
+            int32 var_956 = const()[name = string("op_956"), val = int32(-1)];
+            tensor<int32, [1]> var_972_axes_0 = const()[name = string("op_972_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211392448)))];
+            tensor<fp16, [1024]> blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211394560)))];
+            fp16 var_962_to_fp16 = const()[name = string("op_962_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_972_cast_fp16 = layer_norm(axes = var_972_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_962_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_103_cast_fp16)[name = string("op_972_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_983_to_fp16 = const()[name = string("op_983_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211396672)))];
+            tensor<fp16, [1024]> var_984_to_fp16 = const()[name = string("op_984_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213493888)))];
+            tensor<fp16, [1, 1500, 1024]> linear_48_cast_fp16 = linear(bias = var_984_to_fp16, weight = var_983_to_fp16, x = var_972_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_987_to_fp16 = const()[name = string("op_987_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213496000)))];
+            tensor<fp16, [1, 1500, 1024]> linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_987_to_fp16, x = var_972_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_991_to_fp16 = const()[name = string("op_991_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215593216)))];
+            tensor<fp16, [1024]> var_992_to_fp16 = const()[name = string("op_992_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217690432)))];
+            tensor<fp16, [1, 1500, 1024]> linear_50_cast_fp16 = linear(bias = var_992_to_fp16, weight = var_991_to_fp16, x = var_972_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [4]> var_1000 = const()[name = string("op_1000"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1001_cast_fp16 = reshape(shape = var_1000, x = linear_48_cast_fp16)[name = string("op_1001_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_184_to_fp16 = const()[name = string("const_184_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_35_cast_fp16 = mul(x = var_1001_cast_fp16, y = const_184_to_fp16)[name = string("q_35_cast_fp16")];
+            tensor<int32, [4]> var_1007 = const()[name = string("op_1007"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1008_cast_fp16 = reshape(shape = var_1007, x = linear_49_cast_fp16)[name = string("op_1008_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_185_to_fp16 = const()[name = string("const_185_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_35_cast_fp16 = mul(x = var_1008_cast_fp16, y = const_185_to_fp16)[name = string("k_35_cast_fp16")];
+            tensor<int32, [4]> var_1014 = const()[name = string("op_1014"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1015_cast_fp16 = reshape(shape = var_1014, x = linear_50_cast_fp16)[name = string("op_1015_cast_fp16")];
+            tensor<int32, [4]> var_1016 = const()[name = string("op_1016"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
+            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_112_perm_0 = const()[name = string("transpose_112_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_113_perm_0 = const()[name = string("transpose_113_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_113 = transpose(perm = transpose_113_perm_0, x = k_35_cast_fp16)[name = string("transpose_205")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_112 = transpose(perm = transpose_112_perm_0, x = q_35_cast_fp16)[name = string("transpose_206")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_112, y = transpose_113)[name = string("qk_17_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1020_cast_fp16 = softmax(axis = var_956, x = qk_17_cast_fp16)[name = string("op_1020_cast_fp16")];
+            bool var_1022_transpose_x_0 = const()[name = string("op_1022_transpose_x_0"), val = bool(false)];
+            bool var_1022_transpose_y_0 = const()[name = string("op_1022_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_35_cast_fp16 = transpose(perm = var_1016, x = var_1015_cast_fp16)[name = string("transpose_207")];
+            tensor<fp16, [1, 16, 1500, 64]> var_1022_cast_fp16 = matmul(transpose_x = var_1022_transpose_x_0, transpose_y = var_1022_transpose_y_0, x = var_1020_cast_fp16, y = v_35_cast_fp16)[name = string("op_1022_cast_fp16")];
+            tensor<int32, [4]> var_1023 = const()[name = string("op_1023"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1024_cast_fp16 = transpose(perm = var_1023, x = var_1022_cast_fp16)[name = string("transpose_204")];
+            tensor<fp16, [1, 1500, 1024]> x_107_cast_fp16 = reshape(shape = concat_8, x = var_1024_cast_fp16)[name = string("x_107_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1028_to_fp16 = const()[name = string("op_1028_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217692544)))];
+            tensor<fp16, [1024]> var_1029_to_fp16 = const()[name = string("op_1029_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219789760)))];
+            tensor<fp16, [1, 1500, 1024]> linear_51_cast_fp16 = linear(bias = var_1029_to_fp16, weight = var_1028_to_fp16, x = x_107_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_109_cast_fp16 = add(x = x_103_cast_fp16, y = linear_51_cast_fp16)[name = string("x_109_cast_fp16")];
+            tensor<int32, [1]> var_1036_axes_0 = const()[name = string("op_1036_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219791872)))];
+            tensor<fp16, [1024]> blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219793984)))];
+            tensor<fp16, [1, 1500, 1024]> var_1036_cast_fp16 = layer_norm(axes = var_1036_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_962_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_109_cast_fp16)[name = string("op_1036_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1045_to_fp16 = const()[name = string("op_1045_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219796096)))];
+            tensor<fp16, [4096]> var_1046_to_fp16 = const()[name = string("op_1046_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228184768)))];
+            tensor<fp16, [1, 1500, 4096]> linear_52_cast_fp16 = linear(bias = var_1046_to_fp16, weight = var_1045_to_fp16, x = var_1036_cast_fp16)[name = string("linear_52_cast_fp16")];
+            string x_113_mode_0 = const()[name = string("x_113_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_113_cast_fp16 = gelu(mode = x_113_mode_0, x = linear_52_cast_fp16)[name = string("x_113_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1051_to_fp16 = const()[name = string("op_1051_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228193024)))];
+            tensor<fp16, [1024]> var_1052_to_fp16 = const()[name = string("op_1052_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236581696)))];
+            tensor<fp16, [1, 1500, 1024]> linear_53_cast_fp16 = linear(bias = var_1052_to_fp16, weight = var_1051_to_fp16, x = x_113_cast_fp16)[name = string("linear_53_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_115_cast_fp16 = add(x = x_109_cast_fp16, y = linear_53_cast_fp16)[name = string("x_115_cast_fp16")];
+            int32 var_1062 = const()[name = string("op_1062"), val = int32(-1)];
+            tensor<int32, [1]> var_1078_axes_0 = const()[name = string("op_1078_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236583808)))];
+            tensor<fp16, [1024]> blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236585920)))];
+            fp16 var_1068_to_fp16 = const()[name = string("op_1068_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_1078_cast_fp16 = layer_norm(axes = var_1078_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_1068_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_115_cast_fp16)[name = string("op_1078_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1089_to_fp16 = const()[name = string("op_1089_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236588032)))];
+            tensor<fp16, [1024]> var_1090_to_fp16 = const()[name = string("op_1090_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238685248)))];
+            tensor<fp16, [1, 1500, 1024]> linear_54_cast_fp16 = linear(bias = var_1090_to_fp16, weight = var_1089_to_fp16, x = var_1078_cast_fp16)[name = string("linear_54_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1093_to_fp16 = const()[name = string("op_1093_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238687360)))];
+            tensor<fp16, [1, 1500, 1024]> linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1093_to_fp16, x = var_1078_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1097_to_fp16 = const()[name = string("op_1097_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240784576)))];
+            tensor<fp16, [1024]> var_1098_to_fp16 = const()[name = string("op_1098_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242881792)))];
+            tensor<fp16, [1, 1500, 1024]> linear_56_cast_fp16 = linear(bias = var_1098_to_fp16, weight = var_1097_to_fp16, x = var_1078_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<int32, [4]> var_1106 = const()[name = string("op_1106"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1107_cast_fp16 = reshape(shape = var_1106, x = linear_54_cast_fp16)[name = string("op_1107_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_39_cast_fp16 = mul(x = var_1107_cast_fp16, y = const_186_to_fp16)[name = string("q_39_cast_fp16")];
+            tensor<int32, [4]> var_1113 = const()[name = string("op_1113"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1114_cast_fp16 = reshape(shape = var_1113, x = linear_55_cast_fp16)[name = string("op_1114_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_187_to_fp16 = const()[name = string("const_187_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_39_cast_fp16 = mul(x = var_1114_cast_fp16, y = const_187_to_fp16)[name = string("k_39_cast_fp16")];
+            tensor<int32, [4]> var_1120 = const()[name = string("op_1120"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1121_cast_fp16 = reshape(shape = var_1120, x = linear_56_cast_fp16)[name = string("op_1121_cast_fp16")];
+            tensor<int32, [4]> var_1122 = const()[name = string("op_1122"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
+            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_114_perm_0 = const()[name = string("transpose_114_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_115_perm_0 = const()[name = string("transpose_115_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_115 = transpose(perm = transpose_115_perm_0, x = k_39_cast_fp16)[name = string("transpose_201")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_114 = transpose(perm = transpose_114_perm_0, x = q_39_cast_fp16)[name = string("transpose_202")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_114, y = transpose_115)[name = string("qk_19_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1126_cast_fp16 = softmax(axis = var_1062, x = qk_19_cast_fp16)[name = string("op_1126_cast_fp16")];
+            bool var_1128_transpose_x_0 = const()[name = string("op_1128_transpose_x_0"), val = bool(false)];
+            bool var_1128_transpose_y_0 = const()[name = string("op_1128_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_39_cast_fp16 = transpose(perm = var_1122, x = var_1121_cast_fp16)[name = string("transpose_203")];
+            tensor<fp16, [1, 16, 1500, 64]> var_1128_cast_fp16 = matmul(transpose_x = var_1128_transpose_x_0, transpose_y = var_1128_transpose_y_0, x = var_1126_cast_fp16, y = v_39_cast_fp16)[name = string("op_1128_cast_fp16")];
+            tensor<int32, [4]> var_1129 = const()[name = string("op_1129"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_9 = const()[name = string("concat_9"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1130_cast_fp16 = transpose(perm = var_1129, x = var_1128_cast_fp16)[name = string("transpose_200")];
+            tensor<fp16, [1, 1500, 1024]> x_119_cast_fp16 = reshape(shape = concat_9, x = var_1130_cast_fp16)[name = string("x_119_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1134_to_fp16 = const()[name = string("op_1134_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242883904)))];
+            tensor<fp16, [1024]> var_1135_to_fp16 = const()[name = string("op_1135_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244981120)))];
+            tensor<fp16, [1, 1500, 1024]> linear_57_cast_fp16 = linear(bias = var_1135_to_fp16, weight = var_1134_to_fp16, x = x_119_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_121_cast_fp16 = add(x = x_115_cast_fp16, y = linear_57_cast_fp16)[name = string("x_121_cast_fp16")];
+            tensor<int32, [1]> var_1142_axes_0 = const()[name = string("op_1142_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244983232)))];
+            tensor<fp16, [1024]> blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244985344)))];
+            tensor<fp16, [1, 1500, 1024]> var_1142_cast_fp16 = layer_norm(axes = var_1142_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_1068_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_121_cast_fp16)[name = string("op_1142_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244987456)))];
+            tensor<fp16, [4096]> var_1152_to_fp16 = const()[name = string("op_1152_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253376128)))];
+            tensor<fp16, [1, 1500, 4096]> linear_58_cast_fp16 = linear(bias = var_1152_to_fp16, weight = var_1151_to_fp16, x = var_1142_cast_fp16)[name = string("linear_58_cast_fp16")];
+            string x_125_mode_0 = const()[name = string("x_125_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = linear_58_cast_fp16)[name = string("x_125_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253384384)))];
+            tensor<fp16, [1024]> var_1158_to_fp16 = const()[name = string("op_1158_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261773056)))];
+            tensor<fp16, [1, 1500, 1024]> linear_59_cast_fp16 = linear(bias = var_1158_to_fp16, weight = var_1157_to_fp16, x = x_125_cast_fp16)[name = string("linear_59_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_127_cast_fp16 = add(x = x_121_cast_fp16, y = linear_59_cast_fp16)[name = string("x_127_cast_fp16")];
+            int32 var_1168 = const()[name = string("op_1168"), val = int32(-1)];
+            tensor<int32, [1]> var_1184_axes_0 = const()[name = string("op_1184_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261775168)))];
+            tensor<fp16, [1024]> blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261777280)))];
+            fp16 var_1174_to_fp16 = const()[name = string("op_1174_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_1184_cast_fp16 = layer_norm(axes = var_1184_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_1174_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_127_cast_fp16)[name = string("op_1184_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1195_to_fp16 = const()[name = string("op_1195_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261779392)))];
+            tensor<fp16, [1024]> var_1196_to_fp16 = const()[name = string("op_1196_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263876608)))];
+            tensor<fp16, [1, 1500, 1024]> linear_60_cast_fp16 = linear(bias = var_1196_to_fp16, weight = var_1195_to_fp16, x = var_1184_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1199_to_fp16 = const()[name = string("op_1199_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263878720)))];
+            tensor<fp16, [1, 1500, 1024]> linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1199_to_fp16, x = var_1184_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1203_to_fp16 = const()[name = string("op_1203_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265975936)))];
+            tensor<fp16, [1024]> var_1204_to_fp16 = const()[name = string("op_1204_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268073152)))];
+            tensor<fp16, [1, 1500, 1024]> linear_62_cast_fp16 = linear(bias = var_1204_to_fp16, weight = var_1203_to_fp16, x = var_1184_cast_fp16)[name = string("linear_62_cast_fp16")];
+            tensor<int32, [4]> var_1212 = const()[name = string("op_1212"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1213_cast_fp16 = reshape(shape = var_1212, x = linear_60_cast_fp16)[name = string("op_1213_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_43_cast_fp16 = mul(x = var_1213_cast_fp16, y = const_188_to_fp16)[name = string("q_43_cast_fp16")];
+            tensor<int32, [4]> var_1219 = const()[name = string("op_1219"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1220_cast_fp16 = reshape(shape = var_1219, x = linear_61_cast_fp16)[name = string("op_1220_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_43_cast_fp16 = mul(x = var_1220_cast_fp16, y = const_189_to_fp16)[name = string("k_43_cast_fp16")];
+            tensor<int32, [4]> var_1226 = const()[name = string("op_1226"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1227_cast_fp16 = reshape(shape = var_1226, x = linear_62_cast_fp16)[name = string("op_1227_cast_fp16")];
+            tensor<int32, [4]> var_1228 = const()[name = string("op_1228"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_21_transpose_x_0 = const()[name = string("qk_21_transpose_x_0"), val = bool(false)];
+            bool qk_21_transpose_y_0 = const()[name = string("qk_21_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_116_perm_0 = const()[name = string("transpose_116_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_117_perm_0 = const()[name = string("transpose_117_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_117 = transpose(perm = transpose_117_perm_0, x = k_43_cast_fp16)[name = string("transpose_197")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_116 = transpose(perm = transpose_116_perm_0, x = q_43_cast_fp16)[name = string("transpose_198")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_21_cast_fp16 = matmul(transpose_x = qk_21_transpose_x_0, transpose_y = qk_21_transpose_y_0, x = transpose_116, y = transpose_117)[name = string("qk_21_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1232_cast_fp16 = softmax(axis = var_1168, x = qk_21_cast_fp16)[name = string("op_1232_cast_fp16")];
+            bool var_1234_transpose_x_0 = const()[name = string("op_1234_transpose_x_0"), val = bool(false)];
+            bool var_1234_transpose_y_0 = const()[name = string("op_1234_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_43_cast_fp16 = transpose(perm = var_1228, x = var_1227_cast_fp16)[name = string("transpose_199")];
+            tensor<fp16, [1, 16, 1500, 64]> var_1234_cast_fp16 = matmul(transpose_x = var_1234_transpose_x_0, transpose_y = var_1234_transpose_y_0, x = var_1232_cast_fp16, y = v_43_cast_fp16)[name = string("op_1234_cast_fp16")];
+            tensor<int32, [4]> var_1235 = const()[name = string("op_1235"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_10 = const()[name = string("concat_10"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1236_cast_fp16 = transpose(perm = var_1235, x = var_1234_cast_fp16)[name = string("transpose_196")];
+            tensor<fp16, [1, 1500, 1024]> x_131_cast_fp16 = reshape(shape = concat_10, x = var_1236_cast_fp16)[name = string("x_131_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1240_to_fp16 = const()[name = string("op_1240_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268075264)))];
+            tensor<fp16, [1024]> var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270172480)))];
+            tensor<fp16, [1, 1500, 1024]> linear_63_cast_fp16 = linear(bias = var_1241_to_fp16, weight = var_1240_to_fp16, x = x_131_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_133_cast_fp16 = add(x = x_127_cast_fp16, y = linear_63_cast_fp16)[name = string("x_133_cast_fp16")];
+            tensor<int32, [1]> var_1248_axes_0 = const()[name = string("op_1248_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270174592)))];
+            tensor<fp16, [1024]> blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270176704)))];
+            tensor<fp16, [1, 1500, 1024]> var_1248_cast_fp16 = layer_norm(axes = var_1248_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_1174_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_133_cast_fp16)[name = string("op_1248_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1257_to_fp16 = const()[name = string("op_1257_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270178816)))];
+            tensor<fp16, [4096]> var_1258_to_fp16 = const()[name = string("op_1258_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278567488)))];
+            tensor<fp16, [1, 1500, 4096]> linear_64_cast_fp16 = linear(bias = var_1258_to_fp16, weight = var_1257_to_fp16, x = var_1248_cast_fp16)[name = string("linear_64_cast_fp16")];
+            string x_137_mode_0 = const()[name = string("x_137_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_137_cast_fp16 = gelu(mode = x_137_mode_0, x = linear_64_cast_fp16)[name = string("x_137_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278575744)))];
+            tensor<fp16, [1024]> var_1264_to_fp16 = const()[name = string("op_1264_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286964416)))];
+            tensor<fp16, [1, 1500, 1024]> linear_65_cast_fp16 = linear(bias = var_1264_to_fp16, weight = var_1263_to_fp16, x = x_137_cast_fp16)[name = string("linear_65_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_139_cast_fp16 = add(x = x_133_cast_fp16, y = linear_65_cast_fp16)[name = string("x_139_cast_fp16")];
+            int32 var_1274 = const()[name = string("op_1274"), val = int32(-1)];
+            tensor<int32, [1]> var_1290_axes_0 = const()[name = string("op_1290_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286966528)))];
+            tensor<fp16, [1024]> blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286968640)))];
+            fp16 var_1280_to_fp16 = const()[name = string("op_1280_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_1290_cast_fp16 = layer_norm(axes = var_1290_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_1280_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_139_cast_fp16)[name = string("op_1290_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1301_to_fp16 = const()[name = string("op_1301_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286970752)))];
+            tensor<fp16, [1024]> var_1302_to_fp16 = const()[name = string("op_1302_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289067968)))];
+            tensor<fp16, [1, 1500, 1024]> linear_66_cast_fp16 = linear(bias = var_1302_to_fp16, weight = var_1301_to_fp16, x = var_1290_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1305_to_fp16 = const()[name = string("op_1305_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289070080)))];
+            tensor<fp16, [1, 1500, 1024]> linear_67_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1305_to_fp16, x = var_1290_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1309_to_fp16 = const()[name = string("op_1309_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291167296)))];
+            tensor<fp16, [1024]> var_1310_to_fp16 = const()[name = string("op_1310_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293264512)))];
+            tensor<fp16, [1, 1500, 1024]> linear_68_cast_fp16 = linear(bias = var_1310_to_fp16, weight = var_1309_to_fp16, x = var_1290_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [4]> var_1318 = const()[name = string("op_1318"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1319_cast_fp16 = reshape(shape = var_1318, x = linear_66_cast_fp16)[name = string("op_1319_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_47_cast_fp16 = mul(x = var_1319_cast_fp16, y = const_190_to_fp16)[name = string("q_47_cast_fp16")];
+            tensor<int32, [4]> var_1325 = const()[name = string("op_1325"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1326_cast_fp16 = reshape(shape = var_1325, x = linear_67_cast_fp16)[name = string("op_1326_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_47_cast_fp16 = mul(x = var_1326_cast_fp16, y = const_191_to_fp16)[name = string("k_47_cast_fp16")];
+            tensor<int32, [4]> var_1332 = const()[name = string("op_1332"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1333_cast_fp16 = reshape(shape = var_1332, x = linear_68_cast_fp16)[name = string("op_1333_cast_fp16")];
+            tensor<int32, [4]> var_1334 = const()[name = string("op_1334"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)];
+            bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_118_perm_0 = const()[name = string("transpose_118_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_119_perm_0 = const()[name = string("transpose_119_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_119 = transpose(perm = transpose_119_perm_0, x = k_47_cast_fp16)[name = string("transpose_193")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_118 = transpose(perm = transpose_118_perm_0, x = q_47_cast_fp16)[name = string("transpose_194")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_118, y = transpose_119)[name = string("qk_23_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1338_cast_fp16 = softmax(axis = var_1274, x = qk_23_cast_fp16)[name = string("op_1338_cast_fp16")];
+            bool var_1340_transpose_x_0 = const()[name = string("op_1340_transpose_x_0"), val = bool(false)];
+            bool var_1340_transpose_y_0 = const()[name = string("op_1340_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_47_cast_fp16 = transpose(perm = var_1334, x = var_1333_cast_fp16)[name = string("transpose_195")];
+            tensor<fp16, [1, 16, 1500, 64]> var_1340_cast_fp16 = matmul(transpose_x = var_1340_transpose_x_0, transpose_y = var_1340_transpose_y_0, x = var_1338_cast_fp16, y = v_47_cast_fp16)[name = string("op_1340_cast_fp16")];
+            tensor<int32, [4]> var_1341 = const()[name = string("op_1341"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1342_cast_fp16 = transpose(perm = var_1341, x = var_1340_cast_fp16)[name = string("transpose_192")];
+            tensor<fp16, [1, 1500, 1024]> x_143_cast_fp16 = reshape(shape = concat_11, x = var_1342_cast_fp16)[name = string("x_143_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1346_to_fp16 = const()[name = string("op_1346_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293266624)))];
+            tensor<fp16, [1024]> var_1347_to_fp16 = const()[name = string("op_1347_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295363840)))];
+            tensor<fp16, [1, 1500, 1024]> linear_69_cast_fp16 = linear(bias = var_1347_to_fp16, weight = var_1346_to_fp16, x = x_143_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_145_cast_fp16 = add(x = x_139_cast_fp16, y = linear_69_cast_fp16)[name = string("x_145_cast_fp16")];
+            tensor<int32, [1]> var_1354_axes_0 = const()[name = string("op_1354_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295365952)))];
+            tensor<fp16, [1024]> blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295368064)))];
+            tensor<fp16, [1, 1500, 1024]> var_1354_cast_fp16 = layer_norm(axes = var_1354_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_1280_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_145_cast_fp16)[name = string("op_1354_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1363_to_fp16 = const()[name = string("op_1363_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295370176)))];
+            tensor<fp16, [4096]> var_1364_to_fp16 = const()[name = string("op_1364_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303758848)))];
+            tensor<fp16, [1, 1500, 4096]> linear_70_cast_fp16 = linear(bias = var_1364_to_fp16, weight = var_1363_to_fp16, x = var_1354_cast_fp16)[name = string("linear_70_cast_fp16")];
+            string x_149_mode_0 = const()[name = string("x_149_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_149_cast_fp16 = gelu(mode = x_149_mode_0, x = linear_70_cast_fp16)[name = string("x_149_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1369_to_fp16 = const()[name = string("op_1369_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303767104)))];
+            tensor<fp16, [1024]> var_1370_to_fp16 = const()[name = string("op_1370_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312155776)))];
+            tensor<fp16, [1, 1500, 1024]> linear_71_cast_fp16 = linear(bias = var_1370_to_fp16, weight = var_1369_to_fp16, x = x_149_cast_fp16)[name = string("linear_71_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_151_cast_fp16 = add(x = x_145_cast_fp16, y = linear_71_cast_fp16)[name = string("x_151_cast_fp16")];
+            int32 var_1380 = const()[name = string("op_1380"), val = int32(-1)];
+            tensor<int32, [1]> var_1396_axes_0 = const()[name = string("op_1396_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312157888)))];
+            tensor<fp16, [1024]> blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312160000)))];
+            fp16 var_1386_to_fp16 = const()[name = string("op_1386_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_1396_cast_fp16 = layer_norm(axes = var_1396_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_1386_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_151_cast_fp16)[name = string("op_1396_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1407_to_fp16 = const()[name = string("op_1407_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312162112)))];
+            tensor<fp16, [1024]> var_1408_to_fp16 = const()[name = string("op_1408_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314259328)))];
+            tensor<fp16, [1, 1500, 1024]> linear_72_cast_fp16 = linear(bias = var_1408_to_fp16, weight = var_1407_to_fp16, x = var_1396_cast_fp16)[name = string("linear_72_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1411_to_fp16 = const()[name = string("op_1411_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314261440)))];
+            tensor<fp16, [1, 1500, 1024]> linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1411_to_fp16, x = var_1396_cast_fp16)[name = string("linear_73_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1415_to_fp16 = const()[name = string("op_1415_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316358656)))];
+            tensor<fp16, [1024]> var_1416_to_fp16 = const()[name = string("op_1416_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318455872)))];
+            tensor<fp16, [1, 1500, 1024]> linear_74_cast_fp16 = linear(bias = var_1416_to_fp16, weight = var_1415_to_fp16, x = var_1396_cast_fp16)[name = string("linear_74_cast_fp16")];
+            tensor<int32, [4]> var_1424 = const()[name = string("op_1424"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1425_cast_fp16 = reshape(shape = var_1424, x = linear_72_cast_fp16)[name = string("op_1425_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_51_cast_fp16 = mul(x = var_1425_cast_fp16, y = const_192_to_fp16)[name = string("q_51_cast_fp16")];
+            tensor<int32, [4]> var_1431 = const()[name = string("op_1431"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1432_cast_fp16 = reshape(shape = var_1431, x = linear_73_cast_fp16)[name = string("op_1432_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_51_cast_fp16 = mul(x = var_1432_cast_fp16, y = const_193_to_fp16)[name = string("k_51_cast_fp16")];
+            tensor<int32, [4]> var_1438 = const()[name = string("op_1438"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1439_cast_fp16 = reshape(shape = var_1438, x = linear_74_cast_fp16)[name = string("op_1439_cast_fp16")];
+            tensor<int32, [4]> var_1440 = const()[name = string("op_1440"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)];
+            bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_120_perm_0 = const()[name = string("transpose_120_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_121_perm_0 = const()[name = string("transpose_121_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_121 = transpose(perm = transpose_121_perm_0, x = k_51_cast_fp16)[name = string("transpose_189")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_120 = transpose(perm = transpose_120_perm_0, x = q_51_cast_fp16)[name = string("transpose_190")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_120, y = transpose_121)[name = string("qk_25_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1444_cast_fp16 = softmax(axis = var_1380, x = qk_25_cast_fp16)[name = string("op_1444_cast_fp16")];
+            bool var_1446_transpose_x_0 = const()[name = string("op_1446_transpose_x_0"), val = bool(false)];
+            bool var_1446_transpose_y_0 = const()[name = string("op_1446_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_51_cast_fp16 = transpose(perm = var_1440, x = var_1439_cast_fp16)[name = string("transpose_191")];
+            tensor<fp16, [1, 16, 1500, 64]> var_1446_cast_fp16 = matmul(transpose_x = var_1446_transpose_x_0, transpose_y = var_1446_transpose_y_0, x = var_1444_cast_fp16, y = v_51_cast_fp16)[name = string("op_1446_cast_fp16")];
+            tensor<int32, [4]> var_1447 = const()[name = string("op_1447"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_12 = const()[name = string("concat_12"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1448_cast_fp16 = transpose(perm = var_1447, x = var_1446_cast_fp16)[name = string("transpose_188")];
+            tensor<fp16, [1, 1500, 1024]> x_155_cast_fp16 = reshape(shape = concat_12, x = var_1448_cast_fp16)[name = string("x_155_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1452_to_fp16 = const()[name = string("op_1452_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318457984)))];
+            tensor<fp16, [1024]> var_1453_to_fp16 = const()[name = string("op_1453_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320555200)))];
+            tensor<fp16, [1, 1500, 1024]> linear_75_cast_fp16 = linear(bias = var_1453_to_fp16, weight = var_1452_to_fp16, x = x_155_cast_fp16)[name = string("linear_75_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_157_cast_fp16 = add(x = x_151_cast_fp16, y = linear_75_cast_fp16)[name = string("x_157_cast_fp16")];
+            tensor<int32, [1]> var_1460_axes_0 = const()[name = string("op_1460_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320557312)))];
+            tensor<fp16, [1024]> blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320559424)))];
+            tensor<fp16, [1, 1500, 1024]> var_1460_cast_fp16 = layer_norm(axes = var_1460_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_1386_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_157_cast_fp16)[name = string("op_1460_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1469_to_fp16 = const()[name = string("op_1469_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320561536)))];
+            tensor<fp16, [4096]> var_1470_to_fp16 = const()[name = string("op_1470_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328950208)))];
+            tensor<fp16, [1, 1500, 4096]> linear_76_cast_fp16 = linear(bias = var_1470_to_fp16, weight = var_1469_to_fp16, x = var_1460_cast_fp16)[name = string("linear_76_cast_fp16")];
+            string x_161_mode_0 = const()[name = string("x_161_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_161_cast_fp16 = gelu(mode = x_161_mode_0, x = linear_76_cast_fp16)[name = string("x_161_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1475_to_fp16 = const()[name = string("op_1475_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328958464)))];
+            tensor<fp16, [1024]> var_1476_to_fp16 = const()[name = string("op_1476_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337347136)))];
+            tensor<fp16, [1, 1500, 1024]> linear_77_cast_fp16 = linear(bias = var_1476_to_fp16, weight = var_1475_to_fp16, x = x_161_cast_fp16)[name = string("linear_77_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_163_cast_fp16 = add(x = x_157_cast_fp16, y = linear_77_cast_fp16)[name = string("x_163_cast_fp16")];
+            int32 var_1486 = const()[name = string("op_1486"), val = int32(-1)];
+            tensor<int32, [1]> var_1502_axes_0 = const()[name = string("op_1502_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337349248)))];
+            tensor<fp16, [1024]> blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337351360)))];
+            fp16 var_1492_to_fp16 = const()[name = string("op_1492_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_1502_cast_fp16 = layer_norm(axes = var_1502_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_1492_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_163_cast_fp16)[name = string("op_1502_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1513_to_fp16 = const()[name = string("op_1513_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337353472)))];
+            tensor<fp16, [1024]> var_1514_to_fp16 = const()[name = string("op_1514_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339450688)))];
+            tensor<fp16, [1, 1500, 1024]> linear_78_cast_fp16 = linear(bias = var_1514_to_fp16, weight = var_1513_to_fp16, x = var_1502_cast_fp16)[name = string("linear_78_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1517_to_fp16 = const()[name = string("op_1517_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339452800)))];
+            tensor<fp16, [1, 1500, 1024]> linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1517_to_fp16, x = var_1502_cast_fp16)[name = string("linear_79_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1521_to_fp16 = const()[name = string("op_1521_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341550016)))];
+            tensor<fp16, [1024]> var_1522_to_fp16 = const()[name = string("op_1522_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343647232)))];
+            tensor<fp16, [1, 1500, 1024]> linear_80_cast_fp16 = linear(bias = var_1522_to_fp16, weight = var_1521_to_fp16, x = var_1502_cast_fp16)[name = string("linear_80_cast_fp16")];
+            tensor<int32, [4]> var_1530 = const()[name = string("op_1530"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1531_cast_fp16 = reshape(shape = var_1530, x = linear_78_cast_fp16)[name = string("op_1531_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_194_to_fp16 = const()[name = string("const_194_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_55_cast_fp16 = mul(x = var_1531_cast_fp16, y = const_194_to_fp16)[name = string("q_55_cast_fp16")];
+            tensor<int32, [4]> var_1537 = const()[name = string("op_1537"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1538_cast_fp16 = reshape(shape = var_1537, x = linear_79_cast_fp16)[name = string("op_1538_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_55_cast_fp16 = mul(x = var_1538_cast_fp16, y = const_195_to_fp16)[name = string("k_55_cast_fp16")];
+            tensor<int32, [4]> var_1544 = const()[name = string("op_1544"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1545_cast_fp16 = reshape(shape = var_1544, x = linear_80_cast_fp16)[name = string("op_1545_cast_fp16")];
+            tensor<int32, [4]> var_1546 = const()[name = string("op_1546"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_27_transpose_x_0 = const()[name = string("qk_27_transpose_x_0"), val = bool(false)];
+            bool qk_27_transpose_y_0 = const()[name = string("qk_27_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_122_perm_0 = const()[name = string("transpose_122_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_123_perm_0 = const()[name = string("transpose_123_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_123 = transpose(perm = transpose_123_perm_0, x = k_55_cast_fp16)[name = string("transpose_185")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_122 = transpose(perm = transpose_122_perm_0, x = q_55_cast_fp16)[name = string("transpose_186")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_27_cast_fp16 = matmul(transpose_x = qk_27_transpose_x_0, transpose_y = qk_27_transpose_y_0, x = transpose_122, y = transpose_123)[name = string("qk_27_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1550_cast_fp16 = softmax(axis = var_1486, x = qk_27_cast_fp16)[name = string("op_1550_cast_fp16")];
+            bool var_1552_transpose_x_0 = const()[name = string("op_1552_transpose_x_0"), val = bool(false)];
+            bool var_1552_transpose_y_0 = const()[name = string("op_1552_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_55_cast_fp16 = transpose(perm = var_1546, x = var_1545_cast_fp16)[name = string("transpose_187")];
+            tensor<fp16, [1, 16, 1500, 64]> var_1552_cast_fp16 = matmul(transpose_x = var_1552_transpose_x_0, transpose_y = var_1552_transpose_y_0, x = var_1550_cast_fp16, y = v_55_cast_fp16)[name = string("op_1552_cast_fp16")];
+            tensor<int32, [4]> var_1553 = const()[name = string("op_1553"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_13 = const()[name = string("concat_13"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1554_cast_fp16 = transpose(perm = var_1553, x = var_1552_cast_fp16)[name = string("transpose_184")];
+            tensor<fp16, [1, 1500, 1024]> x_167_cast_fp16 = reshape(shape = concat_13, x = var_1554_cast_fp16)[name = string("x_167_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1558_to_fp16 = const()[name = string("op_1558_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343649344)))];
+            tensor<fp16, [1024]> var_1559_to_fp16 = const()[name = string("op_1559_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345746560)))];
+            tensor<fp16, [1, 1500, 1024]> linear_81_cast_fp16 = linear(bias = var_1559_to_fp16, weight = var_1558_to_fp16, x = x_167_cast_fp16)[name = string("linear_81_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_169_cast_fp16 = add(x = x_163_cast_fp16, y = linear_81_cast_fp16)[name = string("x_169_cast_fp16")];
+            tensor<int32, [1]> var_1566_axes_0 = const()[name = string("op_1566_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345748672)))];
+            tensor<fp16, [1024]> blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345750784)))];
+            tensor<fp16, [1, 1500, 1024]> var_1566_cast_fp16 = layer_norm(axes = var_1566_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_1492_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_169_cast_fp16)[name = string("op_1566_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1575_to_fp16 = const()[name = string("op_1575_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345752896)))];
+            tensor<fp16, [4096]> var_1576_to_fp16 = const()[name = string("op_1576_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354141568)))];
+            tensor<fp16, [1, 1500, 4096]> linear_82_cast_fp16 = linear(bias = var_1576_to_fp16, weight = var_1575_to_fp16, x = var_1566_cast_fp16)[name = string("linear_82_cast_fp16")];
+            string x_173_mode_0 = const()[name = string("x_173_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_173_cast_fp16 = gelu(mode = x_173_mode_0, x = linear_82_cast_fp16)[name = string("x_173_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1581_to_fp16 = const()[name = string("op_1581_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354149824)))];
+            tensor<fp16, [1024]> var_1582_to_fp16 = const()[name = string("op_1582_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362538496)))];
+            tensor<fp16, [1, 1500, 1024]> linear_83_cast_fp16 = linear(bias = var_1582_to_fp16, weight = var_1581_to_fp16, x = x_173_cast_fp16)[name = string("linear_83_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_175_cast_fp16 = add(x = x_169_cast_fp16, y = linear_83_cast_fp16)[name = string("x_175_cast_fp16")];
+            int32 var_1592 = const()[name = string("op_1592"), val = int32(-1)];
+            tensor<int32, [1]> var_1608_axes_0 = const()[name = string("op_1608_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362540608)))];
+            tensor<fp16, [1024]> blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362542720)))];
+            fp16 var_1598_to_fp16 = const()[name = string("op_1598_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_1608_cast_fp16 = layer_norm(axes = var_1608_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_1598_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_175_cast_fp16)[name = string("op_1608_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1619_to_fp16 = const()[name = string("op_1619_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362544832)))];
+            tensor<fp16, [1024]> var_1620_to_fp16 = const()[name = string("op_1620_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364642048)))];
+            tensor<fp16, [1, 1500, 1024]> linear_84_cast_fp16 = linear(bias = var_1620_to_fp16, weight = var_1619_to_fp16, x = var_1608_cast_fp16)[name = string("linear_84_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1623_to_fp16 = const()[name = string("op_1623_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364644160)))];
+            tensor<fp16, [1, 1500, 1024]> linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1623_to_fp16, x = var_1608_cast_fp16)[name = string("linear_85_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1627_to_fp16 = const()[name = string("op_1627_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366741376)))];
+            tensor<fp16, [1024]> var_1628_to_fp16 = const()[name = string("op_1628_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368838592)))];
+            tensor<fp16, [1, 1500, 1024]> linear_86_cast_fp16 = linear(bias = var_1628_to_fp16, weight = var_1627_to_fp16, x = var_1608_cast_fp16)[name = string("linear_86_cast_fp16")];
+            tensor<int32, [4]> var_1636 = const()[name = string("op_1636"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1637_cast_fp16 = reshape(shape = var_1636, x = linear_84_cast_fp16)[name = string("op_1637_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_59_cast_fp16 = mul(x = var_1637_cast_fp16, y = const_196_to_fp16)[name = string("q_59_cast_fp16")];
+            tensor<int32, [4]> var_1643 = const()[name = string("op_1643"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1644_cast_fp16 = reshape(shape = var_1643, x = linear_85_cast_fp16)[name = string("op_1644_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_59_cast_fp16 = mul(x = var_1644_cast_fp16, y = const_197_to_fp16)[name = string("k_59_cast_fp16")];
+            tensor<int32, [4]> var_1650 = const()[name = string("op_1650"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1651_cast_fp16 = reshape(shape = var_1650, x = linear_86_cast_fp16)[name = string("op_1651_cast_fp16")];
+            tensor<int32, [4]> var_1652 = const()[name = string("op_1652"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)];
+            bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_124_perm_0 = const()[name = string("transpose_124_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_125_perm_0 = const()[name = string("transpose_125_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_125 = transpose(perm = transpose_125_perm_0, x = k_59_cast_fp16)[name = string("transpose_181")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_124 = transpose(perm = transpose_124_perm_0, x = q_59_cast_fp16)[name = string("transpose_182")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_124, y = transpose_125)[name = string("qk_29_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1656_cast_fp16 = softmax(axis = var_1592, x = qk_29_cast_fp16)[name = string("op_1656_cast_fp16")];
+            bool var_1658_transpose_x_0 = const()[name = string("op_1658_transpose_x_0"), val = bool(false)];
+            bool var_1658_transpose_y_0 = const()[name = string("op_1658_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_59_cast_fp16 = transpose(perm = var_1652, x = var_1651_cast_fp16)[name = string("transpose_183")];
+            tensor<fp16, [1, 16, 1500, 64]> var_1658_cast_fp16 = matmul(transpose_x = var_1658_transpose_x_0, transpose_y = var_1658_transpose_y_0, x = var_1656_cast_fp16, y = v_59_cast_fp16)[name = string("op_1658_cast_fp16")];
+            tensor<int32, [4]> var_1659 = const()[name = string("op_1659"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1660_cast_fp16 = transpose(perm = var_1659, x = var_1658_cast_fp16)[name = string("transpose_180")];
+            tensor<fp16, [1, 1500, 1024]> x_179_cast_fp16 = reshape(shape = concat_14, x = var_1660_cast_fp16)[name = string("x_179_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1664_to_fp16 = const()[name = string("op_1664_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368840704)))];
+            tensor<fp16, [1024]> var_1665_to_fp16 = const()[name = string("op_1665_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370937920)))];
+            tensor<fp16, [1, 1500, 1024]> linear_87_cast_fp16 = linear(bias = var_1665_to_fp16, weight = var_1664_to_fp16, x = x_179_cast_fp16)[name = string("linear_87_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_181_cast_fp16 = add(x = x_175_cast_fp16, y = linear_87_cast_fp16)[name = string("x_181_cast_fp16")];
+            tensor<int32, [1]> var_1672_axes_0 = const()[name = string("op_1672_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370940032)))];
+            tensor<fp16, [1024]> blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370942144)))];
+            tensor<fp16, [1, 1500, 1024]> var_1672_cast_fp16 = layer_norm(axes = var_1672_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_1598_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_181_cast_fp16)[name = string("op_1672_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1681_to_fp16 = const()[name = string("op_1681_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370944256)))];
+            tensor<fp16, [4096]> var_1682_to_fp16 = const()[name = string("op_1682_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379332928)))];
+            tensor<fp16, [1, 1500, 4096]> linear_88_cast_fp16 = linear(bias = var_1682_to_fp16, weight = var_1681_to_fp16, x = var_1672_cast_fp16)[name = string("linear_88_cast_fp16")];
+            string x_185_mode_0 = const()[name = string("x_185_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_185_cast_fp16 = gelu(mode = x_185_mode_0, x = linear_88_cast_fp16)[name = string("x_185_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1687_to_fp16 = const()[name = string("op_1687_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379341184)))];
+            tensor<fp16, [1024]> var_1688_to_fp16 = const()[name = string("op_1688_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387729856)))];
+            tensor<fp16, [1, 1500, 1024]> linear_89_cast_fp16 = linear(bias = var_1688_to_fp16, weight = var_1687_to_fp16, x = x_185_cast_fp16)[name = string("linear_89_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_187_cast_fp16 = add(x = x_181_cast_fp16, y = linear_89_cast_fp16)[name = string("x_187_cast_fp16")];
+            int32 var_1698 = const()[name = string("op_1698"), val = int32(-1)];
+            tensor<int32, [1]> var_1714_axes_0 = const()[name = string("op_1714_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387731968)))];
+            tensor<fp16, [1024]> blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387734080)))];
+            fp16 var_1704_to_fp16 = const()[name = string("op_1704_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_1714_cast_fp16 = layer_norm(axes = var_1714_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_1704_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_187_cast_fp16)[name = string("op_1714_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387736192)))];
+            tensor<fp16, [1024]> var_1726_to_fp16 = const()[name = string("op_1726_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389833408)))];
+            tensor<fp16, [1, 1500, 1024]> linear_90_cast_fp16 = linear(bias = var_1726_to_fp16, weight = var_1725_to_fp16, x = var_1714_cast_fp16)[name = string("linear_90_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1729_to_fp16 = const()[name = string("op_1729_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389835520)))];
+            tensor<fp16, [1, 1500, 1024]> linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1729_to_fp16, x = var_1714_cast_fp16)[name = string("linear_91_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1733_to_fp16 = const()[name = string("op_1733_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391932736)))];
+            tensor<fp16, [1024]> var_1734_to_fp16 = const()[name = string("op_1734_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394029952)))];
+            tensor<fp16, [1, 1500, 1024]> linear_92_cast_fp16 = linear(bias = var_1734_to_fp16, weight = var_1733_to_fp16, x = var_1714_cast_fp16)[name = string("linear_92_cast_fp16")];
+            tensor<int32, [4]> var_1742 = const()[name = string("op_1742"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1743_cast_fp16 = reshape(shape = var_1742, x = linear_90_cast_fp16)[name = string("op_1743_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_63_cast_fp16 = mul(x = var_1743_cast_fp16, y = const_198_to_fp16)[name = string("q_63_cast_fp16")];
+            tensor<int32, [4]> var_1749 = const()[name = string("op_1749"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1750_cast_fp16 = reshape(shape = var_1749, x = linear_91_cast_fp16)[name = string("op_1750_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_63_cast_fp16 = mul(x = var_1750_cast_fp16, y = const_199_to_fp16)[name = string("k_63_cast_fp16")];
+            tensor<int32, [4]> var_1756 = const()[name = string("op_1756"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1757_cast_fp16 = reshape(shape = var_1756, x = linear_92_cast_fp16)[name = string("op_1757_cast_fp16")];
+            tensor<int32, [4]> var_1758 = const()[name = string("op_1758"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)];
+            bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_126_perm_0 = const()[name = string("transpose_126_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_127_perm_0 = const()[name = string("transpose_127_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_127 = transpose(perm = transpose_127_perm_0, x = k_63_cast_fp16)[name = string("transpose_177")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_126 = transpose(perm = transpose_126_perm_0, x = q_63_cast_fp16)[name = string("transpose_178")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_126, y = transpose_127)[name = string("qk_31_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1762_cast_fp16 = softmax(axis = var_1698, x = qk_31_cast_fp16)[name = string("op_1762_cast_fp16")];
+            bool var_1764_transpose_x_0 = const()[name = string("op_1764_transpose_x_0"), val = bool(false)];
+            bool var_1764_transpose_y_0 = const()[name = string("op_1764_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_63_cast_fp16 = transpose(perm = var_1758, x = var_1757_cast_fp16)[name = string("transpose_179")];
+            tensor<fp16, [1, 16, 1500, 64]> var_1764_cast_fp16 = matmul(transpose_x = var_1764_transpose_x_0, transpose_y = var_1764_transpose_y_0, x = var_1762_cast_fp16, y = v_63_cast_fp16)[name = string("op_1764_cast_fp16")];
+            tensor<int32, [4]> var_1765 = const()[name = string("op_1765"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_15 = const()[name = string("concat_15"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1766_cast_fp16 = transpose(perm = var_1765, x = var_1764_cast_fp16)[name = string("transpose_176")];
+            tensor<fp16, [1, 1500, 1024]> x_191_cast_fp16 = reshape(shape = concat_15, x = var_1766_cast_fp16)[name = string("x_191_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1770_to_fp16 = const()[name = string("op_1770_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394032064)))];
+            tensor<fp16, [1024]> var_1771_to_fp16 = const()[name = string("op_1771_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396129280)))];
+            tensor<fp16, [1, 1500, 1024]> linear_93_cast_fp16 = linear(bias = var_1771_to_fp16, weight = var_1770_to_fp16, x = x_191_cast_fp16)[name = string("linear_93_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_193_cast_fp16 = add(x = x_187_cast_fp16, y = linear_93_cast_fp16)[name = string("x_193_cast_fp16")];
+            tensor<int32, [1]> var_1778_axes_0 = const()[name = string("op_1778_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396131392)))];
+            tensor<fp16, [1024]> blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396133504)))];
+            tensor<fp16, [1, 1500, 1024]> var_1778_cast_fp16 = layer_norm(axes = var_1778_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_1704_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_193_cast_fp16)[name = string("op_1778_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1787_to_fp16 = const()[name = string("op_1787_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396135616)))];
+            tensor<fp16, [4096]> var_1788_to_fp16 = const()[name = string("op_1788_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404524288)))];
+            tensor<fp16, [1, 1500, 4096]> linear_94_cast_fp16 = linear(bias = var_1788_to_fp16, weight = var_1787_to_fp16, x = var_1778_cast_fp16)[name = string("linear_94_cast_fp16")];
+            string x_197_mode_0 = const()[name = string("x_197_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_197_cast_fp16 = gelu(mode = x_197_mode_0, x = linear_94_cast_fp16)[name = string("x_197_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1793_to_fp16 = const()[name = string("op_1793_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404532544)))];
+            tensor<fp16, [1024]> var_1794_to_fp16 = const()[name = string("op_1794_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412921216)))];
+            tensor<fp16, [1, 1500, 1024]> linear_95_cast_fp16 = linear(bias = var_1794_to_fp16, weight = var_1793_to_fp16, x = x_197_cast_fp16)[name = string("linear_95_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_199_cast_fp16 = add(x = x_193_cast_fp16, y = linear_95_cast_fp16)[name = string("x_199_cast_fp16")];
+            int32 var_1804 = const()[name = string("op_1804"), val = int32(-1)];
+            tensor<int32, [1]> var_1820_axes_0 = const()[name = string("op_1820_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412923328)))];
+            tensor<fp16, [1024]> blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412925440)))];
+            fp16 var_1810_to_fp16 = const()[name = string("op_1810_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_1820_cast_fp16 = layer_norm(axes = var_1820_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_1810_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_199_cast_fp16)[name = string("op_1820_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1831_to_fp16 = const()[name = string("op_1831_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412927552)))];
+            tensor<fp16, [1024]> var_1832_to_fp16 = const()[name = string("op_1832_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415024768)))];
+            tensor<fp16, [1, 1500, 1024]> linear_96_cast_fp16 = linear(bias = var_1832_to_fp16, weight = var_1831_to_fp16, x = var_1820_cast_fp16)[name = string("linear_96_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1835_to_fp16 = const()[name = string("op_1835_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415026880)))];
+            tensor<fp16, [1, 1500, 1024]> linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1835_to_fp16, x = var_1820_cast_fp16)[name = string("linear_97_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1839_to_fp16 = const()[name = string("op_1839_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417124096)))];
+            tensor<fp16, [1024]> var_1840_to_fp16 = const()[name = string("op_1840_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419221312)))];
+            tensor<fp16, [1, 1500, 1024]> linear_98_cast_fp16 = linear(bias = var_1840_to_fp16, weight = var_1839_to_fp16, x = var_1820_cast_fp16)[name = string("linear_98_cast_fp16")];
+            tensor<int32, [4]> var_1848 = const()[name = string("op_1848"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1849_cast_fp16 = reshape(shape = var_1848, x = linear_96_cast_fp16)[name = string("op_1849_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_67_cast_fp16 = mul(x = var_1849_cast_fp16, y = const_200_to_fp16)[name = string("q_67_cast_fp16")];
+            tensor<int32, [4]> var_1855 = const()[name = string("op_1855"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1856_cast_fp16 = reshape(shape = var_1855, x = linear_97_cast_fp16)[name = string("op_1856_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_67_cast_fp16 = mul(x = var_1856_cast_fp16, y = const_201_to_fp16)[name = string("k_67_cast_fp16")];
+            tensor<int32, [4]> var_1862 = const()[name = string("op_1862"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1863_cast_fp16 = reshape(shape = var_1862, x = linear_98_cast_fp16)[name = string("op_1863_cast_fp16")];
+            tensor<int32, [4]> var_1864 = const()[name = string("op_1864"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_33_transpose_x_0 = const()[name = string("qk_33_transpose_x_0"), val = bool(false)];
+            bool qk_33_transpose_y_0 = const()[name = string("qk_33_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_128_perm_0 = const()[name = string("transpose_128_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_129_perm_0 = const()[name = string("transpose_129_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_129 = transpose(perm = transpose_129_perm_0, x = k_67_cast_fp16)[name = string("transpose_173")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_128 = transpose(perm = transpose_128_perm_0, x = q_67_cast_fp16)[name = string("transpose_174")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_33_cast_fp16 = matmul(transpose_x = qk_33_transpose_x_0, transpose_y = qk_33_transpose_y_0, x = transpose_128, y = transpose_129)[name = string("qk_33_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1868_cast_fp16 = softmax(axis = var_1804, x = qk_33_cast_fp16)[name = string("op_1868_cast_fp16")];
+            bool var_1870_transpose_x_0 = const()[name = string("op_1870_transpose_x_0"), val = bool(false)];
+            bool var_1870_transpose_y_0 = const()[name = string("op_1870_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_67_cast_fp16 = transpose(perm = var_1864, x = var_1863_cast_fp16)[name = string("transpose_175")];
+            tensor<fp16, [1, 16, 1500, 64]> var_1870_cast_fp16 = matmul(transpose_x = var_1870_transpose_x_0, transpose_y = var_1870_transpose_y_0, x = var_1868_cast_fp16, y = v_67_cast_fp16)[name = string("op_1870_cast_fp16")];
+            tensor<int32, [4]> var_1871 = const()[name = string("op_1871"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_16 = const()[name = string("concat_16"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1872_cast_fp16 = transpose(perm = var_1871, x = var_1870_cast_fp16)[name = string("transpose_172")];
+            tensor<fp16, [1, 1500, 1024]> x_203_cast_fp16 = reshape(shape = concat_16, x = var_1872_cast_fp16)[name = string("x_203_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1876_to_fp16 = const()[name = string("op_1876_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419223424)))];
+            tensor<fp16, [1024]> var_1877_to_fp16 = const()[name = string("op_1877_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421320640)))];
+            tensor<fp16, [1, 1500, 1024]> linear_99_cast_fp16 = linear(bias = var_1877_to_fp16, weight = var_1876_to_fp16, x = x_203_cast_fp16)[name = string("linear_99_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_205_cast_fp16 = add(x = x_199_cast_fp16, y = linear_99_cast_fp16)[name = string("x_205_cast_fp16")];
+            tensor<int32, [1]> var_1884_axes_0 = const()[name = string("op_1884_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421322752)))];
+            tensor<fp16, [1024]> blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421324864)))];
+            tensor<fp16, [1, 1500, 1024]> var_1884_cast_fp16 = layer_norm(axes = var_1884_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_1810_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_205_cast_fp16)[name = string("op_1884_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1893_to_fp16 = const()[name = string("op_1893_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421326976)))];
+            tensor<fp16, [4096]> var_1894_to_fp16 = const()[name = string("op_1894_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429715648)))];
+            tensor<fp16, [1, 1500, 4096]> linear_100_cast_fp16 = linear(bias = var_1894_to_fp16, weight = var_1893_to_fp16, x = var_1884_cast_fp16)[name = string("linear_100_cast_fp16")];
+            string x_209_mode_0 = const()[name = string("x_209_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_209_cast_fp16 = gelu(mode = x_209_mode_0, x = linear_100_cast_fp16)[name = string("x_209_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_1899_to_fp16 = const()[name = string("op_1899_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429723904)))];
+            tensor<fp16, [1024]> var_1900_to_fp16 = const()[name = string("op_1900_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438112576)))];
+            tensor<fp16, [1, 1500, 1024]> linear_101_cast_fp16 = linear(bias = var_1900_to_fp16, weight = var_1899_to_fp16, x = x_209_cast_fp16)[name = string("linear_101_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_211_cast_fp16 = add(x = x_205_cast_fp16, y = linear_101_cast_fp16)[name = string("x_211_cast_fp16")];
+            int32 var_1910 = const()[name = string("op_1910"), val = int32(-1)];
+            tensor<int32, [1]> var_1926_axes_0 = const()[name = string("op_1926_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438114688)))];
+            tensor<fp16, [1024]> blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438116800)))];
+            fp16 var_1916_to_fp16 = const()[name = string("op_1916_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_1926_cast_fp16 = layer_norm(axes = var_1926_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_1916_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_211_cast_fp16)[name = string("op_1926_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1937_to_fp16 = const()[name = string("op_1937_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438118912)))];
+            tensor<fp16, [1024]> var_1938_to_fp16 = const()[name = string("op_1938_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440216128)))];
+            tensor<fp16, [1, 1500, 1024]> linear_102_cast_fp16 = linear(bias = var_1938_to_fp16, weight = var_1937_to_fp16, x = var_1926_cast_fp16)[name = string("linear_102_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1941_to_fp16 = const()[name = string("op_1941_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440218240)))];
+            tensor<fp16, [1, 1500, 1024]> linear_103_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1941_to_fp16, x = var_1926_cast_fp16)[name = string("linear_103_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1945_to_fp16 = const()[name = string("op_1945_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442315456)))];
+            tensor<fp16, [1024]> var_1946_to_fp16 = const()[name = string("op_1946_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444412672)))];
+            tensor<fp16, [1, 1500, 1024]> linear_104_cast_fp16 = linear(bias = var_1946_to_fp16, weight = var_1945_to_fp16, x = var_1926_cast_fp16)[name = string("linear_104_cast_fp16")];
+            tensor<int32, [4]> var_1954 = const()[name = string("op_1954"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1955_cast_fp16 = reshape(shape = var_1954, x = linear_102_cast_fp16)[name = string("op_1955_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_71_cast_fp16 = mul(x = var_1955_cast_fp16, y = const_202_to_fp16)[name = string("q_71_cast_fp16")];
+            tensor<int32, [4]> var_1961 = const()[name = string("op_1961"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1962_cast_fp16 = reshape(shape = var_1961, x = linear_103_cast_fp16)[name = string("op_1962_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_71_cast_fp16 = mul(x = var_1962_cast_fp16, y = const_203_to_fp16)[name = string("k_71_cast_fp16")];
+            tensor<int32, [4]> var_1968 = const()[name = string("op_1968"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1969_cast_fp16 = reshape(shape = var_1968, x = linear_104_cast_fp16)[name = string("op_1969_cast_fp16")];
+            tensor<int32, [4]> var_1970 = const()[name = string("op_1970"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)];
+            bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_130_perm_0 = const()[name = string("transpose_130_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_131_perm_0 = const()[name = string("transpose_131_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_131 = transpose(perm = transpose_131_perm_0, x = k_71_cast_fp16)[name = string("transpose_169")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_130 = transpose(perm = transpose_130_perm_0, x = q_71_cast_fp16)[name = string("transpose_170")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_130, y = transpose_131)[name = string("qk_35_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_1974_cast_fp16 = softmax(axis = var_1910, x = qk_35_cast_fp16)[name = string("op_1974_cast_fp16")];
+            bool var_1976_transpose_x_0 = const()[name = string("op_1976_transpose_x_0"), val = bool(false)];
+            bool var_1976_transpose_y_0 = const()[name = string("op_1976_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_71_cast_fp16 = transpose(perm = var_1970, x = var_1969_cast_fp16)[name = string("transpose_171")];
+            tensor<fp16, [1, 16, 1500, 64]> var_1976_cast_fp16 = matmul(transpose_x = var_1976_transpose_x_0, transpose_y = var_1976_transpose_y_0, x = var_1974_cast_fp16, y = v_71_cast_fp16)[name = string("op_1976_cast_fp16")];
+            tensor<int32, [4]> var_1977 = const()[name = string("op_1977"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_1978_cast_fp16 = transpose(perm = var_1977, x = var_1976_cast_fp16)[name = string("transpose_168")];
+            tensor<fp16, [1, 1500, 1024]> x_215_cast_fp16 = reshape(shape = concat_17, x = var_1978_cast_fp16)[name = string("x_215_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_1982_to_fp16 = const()[name = string("op_1982_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444414784)))];
+            tensor<fp16, [1024]> var_1983_to_fp16 = const()[name = string("op_1983_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446512000)))];
+            tensor<fp16, [1, 1500, 1024]> linear_105_cast_fp16 = linear(bias = var_1983_to_fp16, weight = var_1982_to_fp16, x = x_215_cast_fp16)[name = string("linear_105_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_217_cast_fp16 = add(x = x_211_cast_fp16, y = linear_105_cast_fp16)[name = string("x_217_cast_fp16")];
+            tensor<int32, [1]> var_1990_axes_0 = const()[name = string("op_1990_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446514112)))];
+            tensor<fp16, [1024]> blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446516224)))];
+            tensor<fp16, [1, 1500, 1024]> var_1990_cast_fp16 = layer_norm(axes = var_1990_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_1916_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_217_cast_fp16)[name = string("op_1990_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_1999_to_fp16 = const()[name = string("op_1999_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446518336)))];
+            tensor<fp16, [4096]> var_2000_to_fp16 = const()[name = string("op_2000_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454907008)))];
+            tensor<fp16, [1, 1500, 4096]> linear_106_cast_fp16 = linear(bias = var_2000_to_fp16, weight = var_1999_to_fp16, x = var_1990_cast_fp16)[name = string("linear_106_cast_fp16")];
+            string x_221_mode_0 = const()[name = string("x_221_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_221_cast_fp16 = gelu(mode = x_221_mode_0, x = linear_106_cast_fp16)[name = string("x_221_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2005_to_fp16 = const()[name = string("op_2005_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454915264)))];
+            tensor<fp16, [1024]> var_2006_to_fp16 = const()[name = string("op_2006_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463303936)))];
+            tensor<fp16, [1, 1500, 1024]> linear_107_cast_fp16 = linear(bias = var_2006_to_fp16, weight = var_2005_to_fp16, x = x_221_cast_fp16)[name = string("linear_107_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_223_cast_fp16 = add(x = x_217_cast_fp16, y = linear_107_cast_fp16)[name = string("x_223_cast_fp16")];
+            int32 var_2016 = const()[name = string("op_2016"), val = int32(-1)];
+            tensor<int32, [1]> var_2032_axes_0 = const()[name = string("op_2032_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463306048)))];
+            tensor<fp16, [1024]> blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463308160)))];
+            fp16 var_2022_to_fp16 = const()[name = string("op_2022_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_2032_cast_fp16 = layer_norm(axes = var_2032_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_2022_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_223_cast_fp16)[name = string("op_2032_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2043_to_fp16 = const()[name = string("op_2043_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463310272)))];
+            tensor<fp16, [1024]> var_2044_to_fp16 = const()[name = string("op_2044_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465407488)))];
+            tensor<fp16, [1, 1500, 1024]> linear_108_cast_fp16 = linear(bias = var_2044_to_fp16, weight = var_2043_to_fp16, x = var_2032_cast_fp16)[name = string("linear_108_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2047_to_fp16 = const()[name = string("op_2047_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465409600)))];
+            tensor<fp16, [1, 1500, 1024]> linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2047_to_fp16, x = var_2032_cast_fp16)[name = string("linear_109_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2051_to_fp16 = const()[name = string("op_2051_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467506816)))];
+            tensor<fp16, [1024]> var_2052_to_fp16 = const()[name = string("op_2052_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469604032)))];
+            tensor<fp16, [1, 1500, 1024]> linear_110_cast_fp16 = linear(bias = var_2052_to_fp16, weight = var_2051_to_fp16, x = var_2032_cast_fp16)[name = string("linear_110_cast_fp16")];
+            tensor<int32, [4]> var_2060 = const()[name = string("op_2060"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2061_cast_fp16 = reshape(shape = var_2060, x = linear_108_cast_fp16)[name = string("op_2061_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_75_cast_fp16 = mul(x = var_2061_cast_fp16, y = const_204_to_fp16)[name = string("q_75_cast_fp16")];
+            tensor<int32, [4]> var_2067 = const()[name = string("op_2067"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2068_cast_fp16 = reshape(shape = var_2067, x = linear_109_cast_fp16)[name = string("op_2068_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_205_to_fp16 = const()[name = string("const_205_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_75_cast_fp16 = mul(x = var_2068_cast_fp16, y = const_205_to_fp16)[name = string("k_75_cast_fp16")];
+            tensor<int32, [4]> var_2074 = const()[name = string("op_2074"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2075_cast_fp16 = reshape(shape = var_2074, x = linear_110_cast_fp16)[name = string("op_2075_cast_fp16")];
+            tensor<int32, [4]> var_2076 = const()[name = string("op_2076"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)];
+            bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_132_perm_0 = const()[name = string("transpose_132_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_133_perm_0 = const()[name = string("transpose_133_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_133 = transpose(perm = transpose_133_perm_0, x = k_75_cast_fp16)[name = string("transpose_165")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_132 = transpose(perm = transpose_132_perm_0, x = q_75_cast_fp16)[name = string("transpose_166")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_132, y = transpose_133)[name = string("qk_37_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2080_cast_fp16 = softmax(axis = var_2016, x = qk_37_cast_fp16)[name = string("op_2080_cast_fp16")];
+            bool var_2082_transpose_x_0 = const()[name = string("op_2082_transpose_x_0"), val = bool(false)];
+            bool var_2082_transpose_y_0 = const()[name = string("op_2082_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_75_cast_fp16 = transpose(perm = var_2076, x = var_2075_cast_fp16)[name = string("transpose_167")];
+            tensor<fp16, [1, 16, 1500, 64]> var_2082_cast_fp16 = matmul(transpose_x = var_2082_transpose_x_0, transpose_y = var_2082_transpose_y_0, x = var_2080_cast_fp16, y = v_75_cast_fp16)[name = string("op_2082_cast_fp16")];
+            tensor<int32, [4]> var_2083 = const()[name = string("op_2083"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2084_cast_fp16 = transpose(perm = var_2083, x = var_2082_cast_fp16)[name = string("transpose_164")];
+            tensor<fp16, [1, 1500, 1024]> x_227_cast_fp16 = reshape(shape = concat_18, x = var_2084_cast_fp16)[name = string("x_227_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2088_to_fp16 = const()[name = string("op_2088_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469606144)))];
+            tensor<fp16, [1024]> var_2089_to_fp16 = const()[name = string("op_2089_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471703360)))];
+            tensor<fp16, [1, 1500, 1024]> linear_111_cast_fp16 = linear(bias = var_2089_to_fp16, weight = var_2088_to_fp16, x = x_227_cast_fp16)[name = string("linear_111_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_229_cast_fp16 = add(x = x_223_cast_fp16, y = linear_111_cast_fp16)[name = string("x_229_cast_fp16")];
+            tensor<int32, [1]> var_2096_axes_0 = const()[name = string("op_2096_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471705472)))];
+            tensor<fp16, [1024]> blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471707584)))];
+            tensor<fp16, [1, 1500, 1024]> var_2096_cast_fp16 = layer_norm(axes = var_2096_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_2022_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_229_cast_fp16)[name = string("op_2096_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2105_to_fp16 = const()[name = string("op_2105_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471709696)))];
+            tensor<fp16, [4096]> var_2106_to_fp16 = const()[name = string("op_2106_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480098368)))];
+            tensor<fp16, [1, 1500, 4096]> linear_112_cast_fp16 = linear(bias = var_2106_to_fp16, weight = var_2105_to_fp16, x = var_2096_cast_fp16)[name = string("linear_112_cast_fp16")];
+            string x_233_mode_0 = const()[name = string("x_233_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_233_cast_fp16 = gelu(mode = x_233_mode_0, x = linear_112_cast_fp16)[name = string("x_233_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2111_to_fp16 = const()[name = string("op_2111_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480106624)))];
+            tensor<fp16, [1024]> var_2112_to_fp16 = const()[name = string("op_2112_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488495296)))];
+            tensor<fp16, [1, 1500, 1024]> linear_113_cast_fp16 = linear(bias = var_2112_to_fp16, weight = var_2111_to_fp16, x = x_233_cast_fp16)[name = string("linear_113_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_235_cast_fp16 = add(x = x_229_cast_fp16, y = linear_113_cast_fp16)[name = string("x_235_cast_fp16")];
+            int32 var_2122 = const()[name = string("op_2122"), val = int32(-1)];
+            tensor<int32, [1]> var_2138_axes_0 = const()[name = string("op_2138_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488497408)))];
+            tensor<fp16, [1024]> blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488499520)))];
+            fp16 var_2128_to_fp16 = const()[name = string("op_2128_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_2138_cast_fp16 = layer_norm(axes = var_2138_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_2128_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_235_cast_fp16)[name = string("op_2138_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2149_to_fp16 = const()[name = string("op_2149_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488501632)))];
+            tensor<fp16, [1024]> var_2150_to_fp16 = const()[name = string("op_2150_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490598848)))];
+            tensor<fp16, [1, 1500, 1024]> linear_114_cast_fp16 = linear(bias = var_2150_to_fp16, weight = var_2149_to_fp16, x = var_2138_cast_fp16)[name = string("linear_114_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2153_to_fp16 = const()[name = string("op_2153_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490600960)))];
+            tensor<fp16, [1, 1500, 1024]> linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2153_to_fp16, x = var_2138_cast_fp16)[name = string("linear_115_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2157_to_fp16 = const()[name = string("op_2157_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492698176)))];
+            tensor<fp16, [1024]> var_2158_to_fp16 = const()[name = string("op_2158_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494795392)))];
+            tensor<fp16, [1, 1500, 1024]> linear_116_cast_fp16 = linear(bias = var_2158_to_fp16, weight = var_2157_to_fp16, x = var_2138_cast_fp16)[name = string("linear_116_cast_fp16")];
+            tensor<int32, [4]> var_2166 = const()[name = string("op_2166"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2167_cast_fp16 = reshape(shape = var_2166, x = linear_114_cast_fp16)[name = string("op_2167_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_79_cast_fp16 = mul(x = var_2167_cast_fp16, y = const_206_to_fp16)[name = string("q_79_cast_fp16")];
+            tensor<int32, [4]> var_2173 = const()[name = string("op_2173"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2174_cast_fp16 = reshape(shape = var_2173, x = linear_115_cast_fp16)[name = string("op_2174_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_79_cast_fp16 = mul(x = var_2174_cast_fp16, y = const_207_to_fp16)[name = string("k_79_cast_fp16")];
+            tensor<int32, [4]> var_2180 = const()[name = string("op_2180"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2181_cast_fp16 = reshape(shape = var_2180, x = linear_116_cast_fp16)[name = string("op_2181_cast_fp16")];
+            tensor<int32, [4]> var_2182 = const()[name = string("op_2182"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_39_transpose_x_0 = const()[name = string("qk_39_transpose_x_0"), val = bool(false)];
+            bool qk_39_transpose_y_0 = const()[name = string("qk_39_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_134_perm_0 = const()[name = string("transpose_134_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_135_perm_0 = const()[name = string("transpose_135_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_135 = transpose(perm = transpose_135_perm_0, x = k_79_cast_fp16)[name = string("transpose_161")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_134 = transpose(perm = transpose_134_perm_0, x = q_79_cast_fp16)[name = string("transpose_162")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_39_cast_fp16 = matmul(transpose_x = qk_39_transpose_x_0, transpose_y = qk_39_transpose_y_0, x = transpose_134, y = transpose_135)[name = string("qk_39_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2186_cast_fp16 = softmax(axis = var_2122, x = qk_39_cast_fp16)[name = string("op_2186_cast_fp16")];
+            bool var_2188_transpose_x_0 = const()[name = string("op_2188_transpose_x_0"), val = bool(false)];
+            bool var_2188_transpose_y_0 = const()[name = string("op_2188_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_79_cast_fp16 = transpose(perm = var_2182, x = var_2181_cast_fp16)[name = string("transpose_163")];
+            tensor<fp16, [1, 16, 1500, 64]> var_2188_cast_fp16 = matmul(transpose_x = var_2188_transpose_x_0, transpose_y = var_2188_transpose_y_0, x = var_2186_cast_fp16, y = v_79_cast_fp16)[name = string("op_2188_cast_fp16")];
+            tensor<int32, [4]> var_2189 = const()[name = string("op_2189"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2190_cast_fp16 = transpose(perm = var_2189, x = var_2188_cast_fp16)[name = string("transpose_160")];
+            tensor<fp16, [1, 1500, 1024]> x_239_cast_fp16 = reshape(shape = concat_19, x = var_2190_cast_fp16)[name = string("x_239_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2194_to_fp16 = const()[name = string("op_2194_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494797504)))];
+            tensor<fp16, [1024]> var_2195_to_fp16 = const()[name = string("op_2195_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496894720)))];
+            tensor<fp16, [1, 1500, 1024]> linear_117_cast_fp16 = linear(bias = var_2195_to_fp16, weight = var_2194_to_fp16, x = x_239_cast_fp16)[name = string("linear_117_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_241_cast_fp16 = add(x = x_235_cast_fp16, y = linear_117_cast_fp16)[name = string("x_241_cast_fp16")];
+            tensor<int32, [1]> var_2202_axes_0 = const()[name = string("op_2202_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496896832)))];
+            tensor<fp16, [1024]> blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496898944)))];
+            tensor<fp16, [1, 1500, 1024]> var_2202_cast_fp16 = layer_norm(axes = var_2202_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_2128_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_241_cast_fp16)[name = string("op_2202_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2211_to_fp16 = const()[name = string("op_2211_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496901056)))];
+            tensor<fp16, [4096]> var_2212_to_fp16 = const()[name = string("op_2212_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505289728)))];
+            tensor<fp16, [1, 1500, 4096]> linear_118_cast_fp16 = linear(bias = var_2212_to_fp16, weight = var_2211_to_fp16, x = var_2202_cast_fp16)[name = string("linear_118_cast_fp16")];
+            string x_245_mode_0 = const()[name = string("x_245_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_245_cast_fp16 = gelu(mode = x_245_mode_0, x = linear_118_cast_fp16)[name = string("x_245_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2217_to_fp16 = const()[name = string("op_2217_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505297984)))];
+            tensor<fp16, [1024]> var_2218_to_fp16 = const()[name = string("op_2218_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513686656)))];
+            tensor<fp16, [1, 1500, 1024]> linear_119_cast_fp16 = linear(bias = var_2218_to_fp16, weight = var_2217_to_fp16, x = x_245_cast_fp16)[name = string("linear_119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_247_cast_fp16 = add(x = x_241_cast_fp16, y = linear_119_cast_fp16)[name = string("x_247_cast_fp16")];
+            int32 var_2228 = const()[name = string("op_2228"), val = int32(-1)];
+            tensor<int32, [1]> var_2244_axes_0 = const()[name = string("op_2244_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513688768)))];
+            tensor<fp16, [1024]> blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513690880)))];
+            fp16 var_2234_to_fp16 = const()[name = string("op_2234_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_2244_cast_fp16 = layer_norm(axes = var_2244_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_2234_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_247_cast_fp16)[name = string("op_2244_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2255_to_fp16 = const()[name = string("op_2255_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513692992)))];
+            tensor<fp16, [1024]> var_2256_to_fp16 = const()[name = string("op_2256_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515790208)))];
+            tensor<fp16, [1, 1500, 1024]> linear_120_cast_fp16 = linear(bias = var_2256_to_fp16, weight = var_2255_to_fp16, x = var_2244_cast_fp16)[name = string("linear_120_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2259_to_fp16 = const()[name = string("op_2259_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515792320)))];
+            tensor<fp16, [1, 1500, 1024]> linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2259_to_fp16, x = var_2244_cast_fp16)[name = string("linear_121_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2263_to_fp16 = const()[name = string("op_2263_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517889536)))];
+            tensor<fp16, [1024]> var_2264_to_fp16 = const()[name = string("op_2264_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519986752)))];
+            tensor<fp16, [1, 1500, 1024]> linear_122_cast_fp16 = linear(bias = var_2264_to_fp16, weight = var_2263_to_fp16, x = var_2244_cast_fp16)[name = string("linear_122_cast_fp16")];
+            tensor<int32, [4]> var_2272 = const()[name = string("op_2272"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2273_cast_fp16 = reshape(shape = var_2272, x = linear_120_cast_fp16)[name = string("op_2273_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_208_to_fp16 = const()[name = string("const_208_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_83_cast_fp16 = mul(x = var_2273_cast_fp16, y = const_208_to_fp16)[name = string("q_83_cast_fp16")];
+            tensor<int32, [4]> var_2279 = const()[name = string("op_2279"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2280_cast_fp16 = reshape(shape = var_2279, x = linear_121_cast_fp16)[name = string("op_2280_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_83_cast_fp16 = mul(x = var_2280_cast_fp16, y = const_209_to_fp16)[name = string("k_83_cast_fp16")];
+            tensor<int32, [4]> var_2286 = const()[name = string("op_2286"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2287_cast_fp16 = reshape(shape = var_2286, x = linear_122_cast_fp16)[name = string("op_2287_cast_fp16")];
+            tensor<int32, [4]> var_2288 = const()[name = string("op_2288"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)];
+            bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_136_perm_0 = const()[name = string("transpose_136_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_137_perm_0 = const()[name = string("transpose_137_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_137 = transpose(perm = transpose_137_perm_0, x = k_83_cast_fp16)[name = string("transpose_157")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_136 = transpose(perm = transpose_136_perm_0, x = q_83_cast_fp16)[name = string("transpose_158")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_136, y = transpose_137)[name = string("qk_41_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2292_cast_fp16 = softmax(axis = var_2228, x = qk_41_cast_fp16)[name = string("op_2292_cast_fp16")];
+            bool var_2294_transpose_x_0 = const()[name = string("op_2294_transpose_x_0"), val = bool(false)];
+            bool var_2294_transpose_y_0 = const()[name = string("op_2294_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_83_cast_fp16 = transpose(perm = var_2288, x = var_2287_cast_fp16)[name = string("transpose_159")];
+            tensor<fp16, [1, 16, 1500, 64]> var_2294_cast_fp16 = matmul(transpose_x = var_2294_transpose_x_0, transpose_y = var_2294_transpose_y_0, x = var_2292_cast_fp16, y = v_83_cast_fp16)[name = string("op_2294_cast_fp16")];
+            tensor<int32, [4]> var_2295 = const()[name = string("op_2295"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2296_cast_fp16 = transpose(perm = var_2295, x = var_2294_cast_fp16)[name = string("transpose_156")];
+            tensor<fp16, [1, 1500, 1024]> x_251_cast_fp16 = reshape(shape = concat_20, x = var_2296_cast_fp16)[name = string("x_251_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2300_to_fp16 = const()[name = string("op_2300_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519988864)))];
+            tensor<fp16, [1024]> var_2301_to_fp16 = const()[name = string("op_2301_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522086080)))];
+            tensor<fp16, [1, 1500, 1024]> linear_123_cast_fp16 = linear(bias = var_2301_to_fp16, weight = var_2300_to_fp16, x = x_251_cast_fp16)[name = string("linear_123_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_253_cast_fp16 = add(x = x_247_cast_fp16, y = linear_123_cast_fp16)[name = string("x_253_cast_fp16")];
+            tensor<int32, [1]> var_2308_axes_0 = const()[name = string("op_2308_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522088192)))];
+            tensor<fp16, [1024]> blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522090304)))];
+            tensor<fp16, [1, 1500, 1024]> var_2308_cast_fp16 = layer_norm(axes = var_2308_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_2234_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_253_cast_fp16)[name = string("op_2308_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2317_to_fp16 = const()[name = string("op_2317_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522092416)))];
+            tensor<fp16, [4096]> var_2318_to_fp16 = const()[name = string("op_2318_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530481088)))];
+            tensor<fp16, [1, 1500, 4096]> linear_124_cast_fp16 = linear(bias = var_2318_to_fp16, weight = var_2317_to_fp16, x = var_2308_cast_fp16)[name = string("linear_124_cast_fp16")];
+            string x_257_mode_0 = const()[name = string("x_257_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_257_cast_fp16 = gelu(mode = x_257_mode_0, x = linear_124_cast_fp16)[name = string("x_257_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2323_to_fp16 = const()[name = string("op_2323_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530489344)))];
+            tensor<fp16, [1024]> var_2324_to_fp16 = const()[name = string("op_2324_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538878016)))];
+            tensor<fp16, [1, 1500, 1024]> linear_125_cast_fp16 = linear(bias = var_2324_to_fp16, weight = var_2323_to_fp16, x = x_257_cast_fp16)[name = string("linear_125_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_259_cast_fp16 = add(x = x_253_cast_fp16, y = linear_125_cast_fp16)[name = string("x_259_cast_fp16")];
+            int32 var_2334 = const()[name = string("op_2334"), val = int32(-1)];
+            tensor<int32, [1]> var_2350_axes_0 = const()[name = string("op_2350_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538880128)))];
+            tensor<fp16, [1024]> blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538882240)))];
+            fp16 var_2340_to_fp16 = const()[name = string("op_2340_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_2350_cast_fp16 = layer_norm(axes = var_2350_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_2340_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_259_cast_fp16)[name = string("op_2350_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2361_to_fp16 = const()[name = string("op_2361_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538884352)))];
+            tensor<fp16, [1024]> var_2362_to_fp16 = const()[name = string("op_2362_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540981568)))];
+            tensor<fp16, [1, 1500, 1024]> linear_126_cast_fp16 = linear(bias = var_2362_to_fp16, weight = var_2361_to_fp16, x = var_2350_cast_fp16)[name = string("linear_126_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2365_to_fp16 = const()[name = string("op_2365_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540983680)))];
+            tensor<fp16, [1, 1500, 1024]> linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2365_to_fp16, x = var_2350_cast_fp16)[name = string("linear_127_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2369_to_fp16 = const()[name = string("op_2369_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543080896)))];
+            tensor<fp16, [1024]> var_2370_to_fp16 = const()[name = string("op_2370_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545178112)))];
+            tensor<fp16, [1, 1500, 1024]> linear_128_cast_fp16 = linear(bias = var_2370_to_fp16, weight = var_2369_to_fp16, x = var_2350_cast_fp16)[name = string("linear_128_cast_fp16")];
+            tensor<int32, [4]> var_2378 = const()[name = string("op_2378"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2379_cast_fp16 = reshape(shape = var_2378, x = linear_126_cast_fp16)[name = string("op_2379_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_87_cast_fp16 = mul(x = var_2379_cast_fp16, y = const_210_to_fp16)[name = string("q_87_cast_fp16")];
+            tensor<int32, [4]> var_2385 = const()[name = string("op_2385"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2386_cast_fp16 = reshape(shape = var_2385, x = linear_127_cast_fp16)[name = string("op_2386_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_87_cast_fp16 = mul(x = var_2386_cast_fp16, y = const_211_to_fp16)[name = string("k_87_cast_fp16")];
+            tensor<int32, [4]> var_2392 = const()[name = string("op_2392"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2393_cast_fp16 = reshape(shape = var_2392, x = linear_128_cast_fp16)[name = string("op_2393_cast_fp16")];
+            tensor<int32, [4]> var_2394 = const()[name = string("op_2394"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)];
+            bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_138_perm_0 = const()[name = string("transpose_138_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_139_perm_0 = const()[name = string("transpose_139_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_139 = transpose(perm = transpose_139_perm_0, x = k_87_cast_fp16)[name = string("transpose_153")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_138 = transpose(perm = transpose_138_perm_0, x = q_87_cast_fp16)[name = string("transpose_154")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_138, y = transpose_139)[name = string("qk_43_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2398_cast_fp16 = softmax(axis = var_2334, x = qk_43_cast_fp16)[name = string("op_2398_cast_fp16")];
+            bool var_2400_transpose_x_0 = const()[name = string("op_2400_transpose_x_0"), val = bool(false)];
+            bool var_2400_transpose_y_0 = const()[name = string("op_2400_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_87_cast_fp16 = transpose(perm = var_2394, x = var_2393_cast_fp16)[name = string("transpose_155")];
+            tensor<fp16, [1, 16, 1500, 64]> var_2400_cast_fp16 = matmul(transpose_x = var_2400_transpose_x_0, transpose_y = var_2400_transpose_y_0, x = var_2398_cast_fp16, y = v_87_cast_fp16)[name = string("op_2400_cast_fp16")];
+            tensor<int32, [4]> var_2401 = const()[name = string("op_2401"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2402_cast_fp16 = transpose(perm = var_2401, x = var_2400_cast_fp16)[name = string("transpose_152")];
+            tensor<fp16, [1, 1500, 1024]> x_263_cast_fp16 = reshape(shape = concat_21, x = var_2402_cast_fp16)[name = string("x_263_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2406_to_fp16 = const()[name = string("op_2406_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545180224)))];
+            tensor<fp16, [1024]> var_2407_to_fp16 = const()[name = string("op_2407_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547277440)))];
+            tensor<fp16, [1, 1500, 1024]> linear_129_cast_fp16 = linear(bias = var_2407_to_fp16, weight = var_2406_to_fp16, x = x_263_cast_fp16)[name = string("linear_129_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_265_cast_fp16 = add(x = x_259_cast_fp16, y = linear_129_cast_fp16)[name = string("x_265_cast_fp16")];
+            tensor<int32, [1]> var_2414_axes_0 = const()[name = string("op_2414_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547279552)))];
+            tensor<fp16, [1024]> blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547281664)))];
+            tensor<fp16, [1, 1500, 1024]> var_2414_cast_fp16 = layer_norm(axes = var_2414_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_2340_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_265_cast_fp16)[name = string("op_2414_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2423_to_fp16 = const()[name = string("op_2423_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547283776)))];
+            tensor<fp16, [4096]> var_2424_to_fp16 = const()[name = string("op_2424_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555672448)))];
+            tensor<fp16, [1, 1500, 4096]> linear_130_cast_fp16 = linear(bias = var_2424_to_fp16, weight = var_2423_to_fp16, x = var_2414_cast_fp16)[name = string("linear_130_cast_fp16")];
+            string x_269_mode_0 = const()[name = string("x_269_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_269_cast_fp16 = gelu(mode = x_269_mode_0, x = linear_130_cast_fp16)[name = string("x_269_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2429_to_fp16 = const()[name = string("op_2429_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555680704)))];
+            tensor<fp16, [1024]> var_2430_to_fp16 = const()[name = string("op_2430_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564069376)))];
+            tensor<fp16, [1, 1500, 1024]> linear_131_cast_fp16 = linear(bias = var_2430_to_fp16, weight = var_2429_to_fp16, x = x_269_cast_fp16)[name = string("linear_131_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_271_cast_fp16 = add(x = x_265_cast_fp16, y = linear_131_cast_fp16)[name = string("x_271_cast_fp16")];
+            int32 var_2440 = const()[name = string("op_2440"), val = int32(-1)];
+            tensor<int32, [1]> var_2456_axes_0 = const()[name = string("op_2456_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564071488)))];
+            tensor<fp16, [1024]> blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564073600)))];
+            fp16 var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_2456_cast_fp16 = layer_norm(axes = var_2456_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_2446_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_271_cast_fp16)[name = string("op_2456_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2467_to_fp16 = const()[name = string("op_2467_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564075712)))];
+            tensor<fp16, [1024]> var_2468_to_fp16 = const()[name = string("op_2468_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566172928)))];
+            tensor<fp16, [1, 1500, 1024]> linear_132_cast_fp16 = linear(bias = var_2468_to_fp16, weight = var_2467_to_fp16, x = var_2456_cast_fp16)[name = string("linear_132_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2471_to_fp16 = const()[name = string("op_2471_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566175040)))];
+            tensor<fp16, [1, 1500, 1024]> linear_133_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2471_to_fp16, x = var_2456_cast_fp16)[name = string("linear_133_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2475_to_fp16 = const()[name = string("op_2475_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568272256)))];
+            tensor<fp16, [1024]> var_2476_to_fp16 = const()[name = string("op_2476_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(570369472)))];
+            tensor<fp16, [1, 1500, 1024]> linear_134_cast_fp16 = linear(bias = var_2476_to_fp16, weight = var_2475_to_fp16, x = var_2456_cast_fp16)[name = string("linear_134_cast_fp16")];
+            tensor<int32, [4]> var_2484 = const()[name = string("op_2484"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2485_cast_fp16 = reshape(shape = var_2484, x = linear_132_cast_fp16)[name = string("op_2485_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_91_cast_fp16 = mul(x = var_2485_cast_fp16, y = const_212_to_fp16)[name = string("q_91_cast_fp16")];
+            tensor<int32, [4]> var_2491 = const()[name = string("op_2491"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2492_cast_fp16 = reshape(shape = var_2491, x = linear_133_cast_fp16)[name = string("op_2492_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_91_cast_fp16 = mul(x = var_2492_cast_fp16, y = const_213_to_fp16)[name = string("k_91_cast_fp16")];
+            tensor<int32, [4]> var_2498 = const()[name = string("op_2498"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2499_cast_fp16 = reshape(shape = var_2498, x = linear_134_cast_fp16)[name = string("op_2499_cast_fp16")];
+            tensor<int32, [4]> var_2500 = const()[name = string("op_2500"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_45_transpose_x_0 = const()[name = string("qk_45_transpose_x_0"), val = bool(false)];
+            bool qk_45_transpose_y_0 = const()[name = string("qk_45_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_140_perm_0 = const()[name = string("transpose_140_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_141_perm_0 = const()[name = string("transpose_141_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_141 = transpose(perm = transpose_141_perm_0, x = k_91_cast_fp16)[name = string("transpose_149")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_140 = transpose(perm = transpose_140_perm_0, x = q_91_cast_fp16)[name = string("transpose_150")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_45_cast_fp16 = matmul(transpose_x = qk_45_transpose_x_0, transpose_y = qk_45_transpose_y_0, x = transpose_140, y = transpose_141)[name = string("qk_45_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2504_cast_fp16 = softmax(axis = var_2440, x = qk_45_cast_fp16)[name = string("op_2504_cast_fp16")];
+            bool var_2506_transpose_x_0 = const()[name = string("op_2506_transpose_x_0"), val = bool(false)];
+            bool var_2506_transpose_y_0 = const()[name = string("op_2506_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_91_cast_fp16 = transpose(perm = var_2500, x = var_2499_cast_fp16)[name = string("transpose_151")];
+            tensor<fp16, [1, 16, 1500, 64]> var_2506_cast_fp16 = matmul(transpose_x = var_2506_transpose_x_0, transpose_y = var_2506_transpose_y_0, x = var_2504_cast_fp16, y = v_91_cast_fp16)[name = string("op_2506_cast_fp16")];
+            tensor<int32, [4]> var_2507 = const()[name = string("op_2507"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_22 = const()[name = string("concat_22"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2508_cast_fp16 = transpose(perm = var_2507, x = var_2506_cast_fp16)[name = string("transpose_148")];
+            tensor<fp16, [1, 1500, 1024]> x_275_cast_fp16 = reshape(shape = concat_22, x = var_2508_cast_fp16)[name = string("x_275_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2512_to_fp16 = const()[name = string("op_2512_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(570371584)))];
+            tensor<fp16, [1024]> var_2513_to_fp16 = const()[name = string("op_2513_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572468800)))];
+            tensor<fp16, [1, 1500, 1024]> linear_135_cast_fp16 = linear(bias = var_2513_to_fp16, weight = var_2512_to_fp16, x = x_275_cast_fp16)[name = string("linear_135_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_277_cast_fp16 = add(x = x_271_cast_fp16, y = linear_135_cast_fp16)[name = string("x_277_cast_fp16")];
+            tensor<int32, [1]> var_2520_axes_0 = const()[name = string("op_2520_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572470912)))];
+            tensor<fp16, [1024]> blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572473024)))];
+            tensor<fp16, [1, 1500, 1024]> var_2520_cast_fp16 = layer_norm(axes = var_2520_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_2446_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_277_cast_fp16)[name = string("op_2520_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2529_to_fp16 = const()[name = string("op_2529_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572475136)))];
+            tensor<fp16, [4096]> var_2530_to_fp16 = const()[name = string("op_2530_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580863808)))];
+            tensor<fp16, [1, 1500, 4096]> linear_136_cast_fp16 = linear(bias = var_2530_to_fp16, weight = var_2529_to_fp16, x = var_2520_cast_fp16)[name = string("linear_136_cast_fp16")];
+            string x_281_mode_0 = const()[name = string("x_281_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_281_cast_fp16 = gelu(mode = x_281_mode_0, x = linear_136_cast_fp16)[name = string("x_281_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2535_to_fp16 = const()[name = string("op_2535_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580872064)))];
+            tensor<fp16, [1024]> var_2536_to_fp16 = const()[name = string("op_2536_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589260736)))];
+            tensor<fp16, [1, 1500, 1024]> linear_137_cast_fp16 = linear(bias = var_2536_to_fp16, weight = var_2535_to_fp16, x = x_281_cast_fp16)[name = string("linear_137_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_283_cast_fp16 = add(x = x_277_cast_fp16, y = linear_137_cast_fp16)[name = string("x_283_cast_fp16")];
+            int32 var_2546 = const()[name = string("op_2546"), val = int32(-1)];
+            tensor<int32, [1]> var_2562_axes_0 = const()[name = string("op_2562_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589262848)))];
+            tensor<fp16, [1024]> blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589264960)))];
+            fp16 var_2552_to_fp16 = const()[name = string("op_2552_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> var_2562_cast_fp16 = layer_norm(axes = var_2562_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_2552_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_283_cast_fp16)[name = string("op_2562_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2573_to_fp16 = const()[name = string("op_2573_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589267072)))];
+            tensor<fp16, [1024]> var_2574_to_fp16 = const()[name = string("op_2574_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591364288)))];
+            tensor<fp16, [1, 1500, 1024]> linear_138_cast_fp16 = linear(bias = var_2574_to_fp16, weight = var_2573_to_fp16, x = var_2562_cast_fp16)[name = string("linear_138_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2577_to_fp16 = const()[name = string("op_2577_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591366400)))];
+            tensor<fp16, [1, 1500, 1024]> linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2577_to_fp16, x = var_2562_cast_fp16)[name = string("linear_139_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2581_to_fp16 = const()[name = string("op_2581_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593463616)))];
+            tensor<fp16, [1024]> var_2582_to_fp16 = const()[name = string("op_2582_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595560832)))];
+            tensor<fp16, [1, 1500, 1024]> linear_140_cast_fp16 = linear(bias = var_2582_to_fp16, weight = var_2581_to_fp16, x = var_2562_cast_fp16)[name = string("linear_140_cast_fp16")];
+            tensor<int32, [4]> var_2590 = const()[name = string("op_2590"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2591_cast_fp16 = reshape(shape = var_2590, x = linear_138_cast_fp16)[name = string("op_2591_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_214_to_fp16 = const()[name = string("const_214_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> q_cast_fp16 = mul(x = var_2591_cast_fp16, y = const_214_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_2597 = const()[name = string("op_2597"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2598_cast_fp16 = reshape(shape = var_2597, x = linear_139_cast_fp16)[name = string("op_2598_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 16, 64]> k_cast_fp16 = mul(x = var_2598_cast_fp16, y = const_215_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_2604 = const()[name = string("op_2604"), val = tensor<int32, [4]>([1, 1500, 16, -1])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2605_cast_fp16 = reshape(shape = var_2604, x = linear_140_cast_fp16)[name = string("op_2605_cast_fp16")];
+            tensor<int32, [4]> var_2606 = const()[name = string("op_2606"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_142_perm_0 = const()[name = string("transpose_142_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_143_perm_0 = const()[name = string("transpose_143_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 16, 64, 1500]> transpose_143 = transpose(perm = transpose_143_perm_0, x = k_cast_fp16)[name = string("transpose_145")];
+            tensor<fp16, [1, 16, 1500, 64]> transpose_142 = transpose(perm = transpose_142_perm_0, x = q_cast_fp16)[name = string("transpose_146")];
+            tensor<fp16, [1, 16, 1500, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_142, y = transpose_143)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 16, 1500, 1500]> var_2610_cast_fp16 = softmax(axis = var_2546, x = qk_cast_fp16)[name = string("op_2610_cast_fp16")];
+            bool var_2612_transpose_x_0 = const()[name = string("op_2612_transpose_x_0"), val = bool(false)];
+            bool var_2612_transpose_y_0 = const()[name = string("op_2612_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 16, 1500, 64]> v_cast_fp16 = transpose(perm = var_2606, x = var_2605_cast_fp16)[name = string("transpose_147")];
+            tensor<fp16, [1, 16, 1500, 64]> var_2612_cast_fp16 = matmul(transpose_x = var_2612_transpose_x_0, transpose_y = var_2612_transpose_y_0, x = var_2610_cast_fp16, y = v_cast_fp16)[name = string("op_2612_cast_fp16")];
+            tensor<int32, [4]> var_2613 = const()[name = string("op_2613"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [3]>([1, 1500, 1024])];
+            tensor<fp16, [1, 1500, 16, 64]> var_2614_cast_fp16 = transpose(perm = var_2613, x = var_2612_cast_fp16)[name = string("transpose_144")];
+            tensor<fp16, [1, 1500, 1024]> x_287_cast_fp16 = reshape(shape = concat_23, x = var_2614_cast_fp16)[name = string("x_287_cast_fp16")];
+            tensor<fp16, [1024, 1024]> var_2618_to_fp16 = const()[name = string("op_2618_to_fp16"), val = tensor<fp16, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595562944)))];
+            tensor<fp16, [1024]> var_2619_to_fp16 = const()[name = string("op_2619_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597660160)))];
+            tensor<fp16, [1, 1500, 1024]> linear_141_cast_fp16 = linear(bias = var_2619_to_fp16, weight = var_2618_to_fp16, x = x_287_cast_fp16)[name = string("linear_141_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_289_cast_fp16 = add(x = x_283_cast_fp16, y = linear_141_cast_fp16)[name = string("x_289_cast_fp16")];
+            tensor<int32, [1]> var_2626_axes_0 = const()[name = string("op_2626_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597662272)))];
+            tensor<fp16, [1024]> blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597664384)))];
+            tensor<fp16, [1, 1500, 1024]> var_2626_cast_fp16 = layer_norm(axes = var_2626_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_2552_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_289_cast_fp16)[name = string("op_2626_cast_fp16")];
+            tensor<fp16, [4096, 1024]> var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = tensor<fp16, [4096, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597666496)))];
+            tensor<fp16, [4096]> var_2636_to_fp16 = const()[name = string("op_2636_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606055168)))];
+            tensor<fp16, [1, 1500, 4096]> linear_142_cast_fp16 = linear(bias = var_2636_to_fp16, weight = var_2635_to_fp16, x = var_2626_cast_fp16)[name = string("linear_142_cast_fp16")];
+            string x_293_mode_0 = const()[name = string("x_293_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 4096]> x_293_cast_fp16 = gelu(mode = x_293_mode_0, x = linear_142_cast_fp16)[name = string("x_293_cast_fp16")];
+            tensor<fp16, [1024, 4096]> var_2641_to_fp16 = const()[name = string("op_2641_to_fp16"), val = tensor<fp16, [1024, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606063424)))];
+            tensor<fp16, [1024]> var_2642_to_fp16 = const()[name = string("op_2642_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614452096)))];
+            tensor<fp16, [1, 1500, 1024]> linear_143_cast_fp16 = linear(bias = var_2642_to_fp16, weight = var_2641_to_fp16, x = x_293_cast_fp16)[name = string("linear_143_cast_fp16")];
+            tensor<fp16, [1, 1500, 1024]> x_cast_fp16 = add(x = x_289_cast_fp16, y = linear_143_cast_fp16)[name = string("x_cast_fp16")];
+            tensor<int32, [1]> var_2655_axes_0 = const()[name = string("op_2655_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024]> ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614454208)))];
+            tensor<fp16, [1024]> ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614456320)))];
+            fp16 var_2646_to_fp16 = const()[name = string("op_2646_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 1024]> output = layer_norm(axes = var_2655_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_2646_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_2655_cast_fp16")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/medium/encoder.mlmodelc/weights/weight.bin b/medium/encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1866ade87664eb3f1788b808651fba6831f3740c
--- /dev/null
+++ b/medium/encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cd1baef4c7d8260ea817ea56705b3700155c01e8d3ea4bc8e364a8674a88d15
+size 614458432
diff --git a/medium/model_dims.json b/medium/model_dims.json
new file mode 100644
index 0000000000000000000000000000000000000000..477e24aaa9c13c6726a8df61f16bd82f1405be55
--- /dev/null
+++ b/medium/model_dims.json
@@ -0,0 +1,12 @@
+{
+  "n_mels": 80,
+  "n_audio_ctx": 1500,
+  "n_audio_state": 1024,
+  "n_audio_head": 16,
+  "n_audio_layer": 24,
+  "n_vocab": 51865,
+  "n_text_ctx": 448,
+  "n_text_state": 1024,
+  "n_text_head": 16,
+  "n_text_layer": 24
+}
\ No newline at end of file
diff --git a/small/decoder_first.mlmodelc/analytics/coremldata.bin b/small/decoder_first.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0032999b4489c4f9d5bd9515f717cc6e1c9fe736
--- /dev/null
+++ b/small/decoder_first.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f646d6c44560c36d629943c0c46fda6c8a900954a25d081de6ca16e2e45d48cd
+size 243
diff --git a/small/decoder_first.mlmodelc/coremldata.bin b/small/decoder_first.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..80bf6c99e245a2440709d8dbd5a28cb5341b2e42
--- /dev/null
+++ b/small/decoder_first.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a62f546da9814e8ccdc00a5ab41a7f8198d82970eb5fedb956aa58759ef3609
+size 453
diff --git a/small/decoder_first.mlmodelc/metadata.json b/small/decoder_first.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..999dc10290ff4c233fecaf090537792a279d768f
--- /dev/null
+++ b/small/decoder_first.mlmodelc/metadata.json
@@ -0,0 +1,106 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "dummy",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.writeState" : 26,
+      "Shape" : 24,
+      "Ios18.linear" : 24,
+      "Identity" : 1,
+      "Ios18.gather" : 24,
+      "Ios18.concat" : 24,
+      "Ios18.sliceUpdate" : 26,
+      "Ios18.cast" : 48,
+      "Ios18.expandDims" : 24,
+      "Ios18.readState" : 26
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 1 × 448 × 768)",
+        "shortDescription" : "",
+        "shape" : "[12, 1, 448, 768]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 1 × 448 × 768)",
+        "shortDescription" : "",
+        "shape" : "[12, 1, 448, 768]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 1 × 1500 × 768)",
+        "shortDescription" : "",
+        "shape" : "[12, 1, 1500, 768]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 1 × 1500 × 768)",
+        "shortDescription" : "",
+        "shape" : "[12, 1, 1500, 768]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...1500 × 768",
+        "shapeRange" : "[[1, 1], [1, 1500], [768, 768]]",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 768)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1, 768]",
+        "name" : "audio_data",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_first",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/small/decoder_first.mlmodelc/model.mil b/small/decoder_first.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..a28886a25fef705861bad1d346a593543141b415
--- /dev/null
+++ b/small/decoder_first.mlmodelc/model.mil
@@ -0,0 +1,711 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [1, ?, 768]> audio_data, state<tensor<fp16, [12, 1, 448, 768]>> k_cache1, state<tensor<fp16, [12, 1, 1500, 768]>> k_cache2, state<tensor<fp16, [12, 1, 448, 768]>> v_cache1, state<tensor<fp16, [12, 1, 1500, 768]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 768]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [768, 768]]}})))] {
+            tensor<fp16, [1, ?, 768]> dummy = identity(x = audio_data)[name = string("identity_0")];
+            tensor<fp16, [12, 1, 448, 768]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor<fp16, [12, 1, 448, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_26_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_27_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<fp16, [12, 1, 1500, 768]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<fp16, [768, 768]> var_91_to_fp16 = const()[name = string("op_91_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8257664)))];
+            tensor<fp16, [768]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9437376)))];
+            tensor<fp16, [1, ?, 768]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_91_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [768, 768]> var_95_to_fp16 = const()[name = string("op_95_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9438976)))];
+            tensor<fp16, [768]> var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10618688)))];
+            tensor<fp16, [1, ?, 768]> linear_1_cast_fp16 = linear(bias = var_96_to_fp16, weight = var_95_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")];
+            tensor<int32, [3]> var_98_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_98_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_98_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_98_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [3]> var_98_shape_cast_fp16_to_int16 = cast(dtype = var_98_shape_cast_fp16_to_int16_dtype_0, x = var_98_shape_cast_fp16)[name = string("cast_79")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_98_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_78")];
+            tensor<int32, [1]> expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")];
+            tensor<int32, [4]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
+            bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_28_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_28 = read_state(input = k_cache2)[name = string("coreml_update_state_28")];
+            tensor<int32, [3]> var_103_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_103_shape_cast_fp16")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_103_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_103_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_103_shape_cast_fp16_to_uint16 = cast(dtype = var_103_shape_cast_fp16_to_uint16_dtype_0, x = var_103_shape_cast_fp16)[name = string("cast_77")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_103_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_76")];
+            tensor<int32, [1]> expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")];
+            tensor<int32, [4]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)];
+            bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_29_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_29 = read_state(input = v_cache2)[name = string("coreml_update_state_29")];
+            tensor<fp16, [768, 768]> var_125_to_fp16 = const()[name = string("op_125_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10620288)))];
+            tensor<fp16, [1, ?, 768]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_125_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")];
+            tensor<fp16, [768, 768]> var_129_to_fp16 = const()[name = string("op_129_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11800000)))];
+            tensor<fp16, [768]> var_130_to_fp16 = const()[name = string("op_130_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12979712)))];
+            tensor<fp16, [1, ?, 768]> linear_3_cast_fp16 = linear(bias = var_130_to_fp16, weight = var_129_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")];
+            tensor<int32, [3]> var_132_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_132_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_132_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_132_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_132_shape_cast_fp16_to_uint16 = cast(dtype = var_132_shape_cast_fp16_to_uint16_dtype_0, x = var_132_shape_cast_fp16)[name = string("cast_75")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_132_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_74")];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")];
+            tensor<int32, [4]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)];
+            bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_28)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_30_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_30 = read_state(input = k_cache2)[name = string("coreml_update_state_30")];
+            tensor<int32, [3]> var_137_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_137_shape_cast_fp16")];
+            int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)];
+            int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)];
+            bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)];
+            string var_137_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_137_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_137_shape_cast_fp16_to_uint16 = cast(dtype = var_137_shape_cast_fp16_to_uint16_dtype_0, x = var_137_shape_cast_fp16)[name = string("cast_73")];
+            uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_137_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")];
+            string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_72")];
+            tensor<int32, [1]> expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")];
+            tensor<int32, [4]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_29)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_31_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_31 = read_state(input = v_cache2)[name = string("coreml_update_state_31")];
+            tensor<fp16, [768, 768]> var_159_to_fp16 = const()[name = string("op_159_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12981312)))];
+            tensor<fp16, [1, ?, 768]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_159_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")];
+            tensor<fp16, [768, 768]> var_163_to_fp16 = const()[name = string("op_163_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14161024)))];
+            tensor<fp16, [768]> var_164_to_fp16 = const()[name = string("op_164_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15340736)))];
+            tensor<fp16, [1, ?, 768]> linear_5_cast_fp16 = linear(bias = var_164_to_fp16, weight = var_163_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")];
+            tensor<int32, [3]> var_166_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_166_shape_cast_fp16")];
+            int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)];
+            int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)];
+            bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)];
+            string var_166_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_166_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_166_shape_cast_fp16_to_uint16 = cast(dtype = var_166_shape_cast_fp16_to_uint16_dtype_0, x = var_166_shape_cast_fp16)[name = string("cast_71")];
+            uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_166_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")];
+            string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_70")];
+            tensor<int32, [1]> expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")];
+            tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)];
+            bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_30)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_32_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_32 = read_state(input = k_cache2)[name = string("coreml_update_state_32")];
+            tensor<int32, [3]> var_171_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_171_shape_cast_fp16")];
+            int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)];
+            int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)];
+            bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)];
+            string var_171_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_171_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_171_shape_cast_fp16_to_uint16 = cast(dtype = var_171_shape_cast_fp16_to_uint16_dtype_0, x = var_171_shape_cast_fp16)[name = string("cast_69")];
+            uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_171_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")];
+            string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_68")];
+            tensor<int32, [1]> expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")];
+            tensor<int32, [4]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)];
+            bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_31)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_33_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_33 = read_state(input = v_cache2)[name = string("coreml_update_state_33")];
+            tensor<fp16, [768, 768]> var_193_to_fp16 = const()[name = string("op_193_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15342336)))];
+            tensor<fp16, [1, ?, 768]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_193_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [768, 768]> var_197_to_fp16 = const()[name = string("op_197_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16522048)))];
+            tensor<fp16, [768]> var_198_to_fp16 = const()[name = string("op_198_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17701760)))];
+            tensor<fp16, [1, ?, 768]> linear_7_cast_fp16 = linear(bias = var_198_to_fp16, weight = var_197_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")];
+            tensor<int32, [3]> var_200_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_200_shape_cast_fp16")];
+            int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)];
+            int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)];
+            bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)];
+            string var_200_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_200_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_200_shape_cast_fp16_to_uint16 = cast(dtype = var_200_shape_cast_fp16_to_uint16_dtype_0, x = var_200_shape_cast_fp16)[name = string("cast_67")];
+            uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_200_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")];
+            string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_66")];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")];
+            tensor<int32, [4]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)];
+            bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_32)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_34_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_34 = read_state(input = k_cache2)[name = string("coreml_update_state_34")];
+            tensor<int32, [3]> var_205_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_205_shape_cast_fp16")];
+            int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)];
+            int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)];
+            bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)];
+            string var_205_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_205_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_205_shape_cast_fp16_to_uint16 = cast(dtype = var_205_shape_cast_fp16_to_uint16_dtype_0, x = var_205_shape_cast_fp16)[name = string("cast_65")];
+            uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_205_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")];
+            string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_64")];
+            tensor<int32, [1]> expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")];
+            tensor<int32, [4]> concat_26 = const()[name = string("concat_26"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_33)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_35_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_35 = read_state(input = v_cache2)[name = string("coreml_update_state_35")];
+            tensor<fp16, [768, 768]> var_227_to_fp16 = const()[name = string("op_227_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17703360)))];
+            tensor<fp16, [1, ?, 768]> linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_227_to_fp16, x = audio_data)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [768, 768]> var_231_to_fp16 = const()[name = string("op_231_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18883072)))];
+            tensor<fp16, [768]> var_232_to_fp16 = const()[name = string("op_232_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20062784)))];
+            tensor<fp16, [1, ?, 768]> linear_9_cast_fp16 = linear(bias = var_232_to_fp16, weight = var_231_to_fp16, x = audio_data)[name = string("linear_9_cast_fp16")];
+            tensor<int32, [3]> var_234_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_234_shape_cast_fp16")];
+            int32 gather_8_axis_0 = const()[name = string("gather_8_axis_0"), val = int32(0)];
+            int32 gather_8_batch_dims_0 = const()[name = string("gather_8_batch_dims_0"), val = int32(0)];
+            bool gather_8_validate_indices_0 = const()[name = string("gather_8_validate_indices_0"), val = bool(false)];
+            string var_234_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_234_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_8_to_uint16 = const()[name = string("select_8_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_234_shape_cast_fp16_to_uint16 = cast(dtype = var_234_shape_cast_fp16_to_uint16_dtype_0, x = var_234_shape_cast_fp16)[name = string("cast_63")];
+            uint16 gather_8_cast_uint16 = gather(axis = gather_8_axis_0, batch_dims = gather_8_batch_dims_0, indices = select_8_to_uint16, validate_indices = gather_8_validate_indices_0, x = var_234_shape_cast_fp16_to_uint16)[name = string("gather_8_cast_uint16")];
+            string gather_8_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_8_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_43_axes_0 = const()[name = string("expand_dims_43_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_8_cast_uint16_to_int32 = cast(dtype = gather_8_cast_uint16_to_int32_dtype_0, x = gather_8_cast_uint16)[name = string("cast_62")];
+            tensor<int32, [1]> expand_dims_43 = expand_dims(axes = expand_dims_43_axes_0, x = gather_8_cast_uint16_to_int32)[name = string("expand_dims_43")];
+            tensor<int32, [4]> concat_29 = const()[name = string("concat_29"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [1]> concat_30_values0_0 = const()[name = string("concat_30_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_30_values1_0 = const()[name = string("concat_30_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_30_values3_0 = const()[name = string("concat_30_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)];
+            bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (concat_30_values0_0, concat_30_values1_0, expand_dims_43, concat_30_values3_0))[name = string("concat_30")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_29, begin_mask = k_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_30, end_mask = k_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_5_stride_0, update = linear_8_cast_fp16, x = coreml_update_state_34)[name = string("k_cache2_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_5_cast_fp16, input = k_cache2)[name = string("coreml_update_state_36_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_36 = read_state(input = k_cache2)[name = string("coreml_update_state_36")];
+            tensor<int32, [3]> var_239_shape_cast_fp16 = shape(x = linear_9_cast_fp16)[name = string("op_239_shape_cast_fp16")];
+            int32 gather_9_axis_0 = const()[name = string("gather_9_axis_0"), val = int32(0)];
+            int32 gather_9_batch_dims_0 = const()[name = string("gather_9_batch_dims_0"), val = int32(0)];
+            bool gather_9_validate_indices_0 = const()[name = string("gather_9_validate_indices_0"), val = bool(false)];
+            string var_239_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_239_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_9_to_uint16 = const()[name = string("select_9_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_239_shape_cast_fp16_to_uint16 = cast(dtype = var_239_shape_cast_fp16_to_uint16_dtype_0, x = var_239_shape_cast_fp16)[name = string("cast_61")];
+            uint16 gather_9_cast_uint16 = gather(axis = gather_9_axis_0, batch_dims = gather_9_batch_dims_0, indices = select_9_to_uint16, validate_indices = gather_9_validate_indices_0, x = var_239_shape_cast_fp16_to_uint16)[name = string("gather_9_cast_uint16")];
+            string gather_9_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_9_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_47_axes_0 = const()[name = string("expand_dims_47_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_9_cast_uint16_to_int32 = cast(dtype = gather_9_cast_uint16_to_int32_dtype_0, x = gather_9_cast_uint16)[name = string("cast_60")];
+            tensor<int32, [1]> expand_dims_47 = expand_dims(axes = expand_dims_47_axes_0, x = gather_9_cast_uint16_to_int32)[name = string("expand_dims_47")];
+            tensor<int32, [4]> concat_32 = const()[name = string("concat_32"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [1]> concat_33_values0_0 = const()[name = string("concat_33_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_33_values1_0 = const()[name = string("concat_33_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_33_values3_0 = const()[name = string("concat_33_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)];
+            bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (concat_33_values0_0, concat_33_values1_0, expand_dims_47, concat_33_values3_0))[name = string("concat_33")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_32, begin_mask = v_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_33, end_mask = v_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_5_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_35)[name = string("v_cache2_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_5_cast_fp16, input = v_cache2)[name = string("coreml_update_state_37_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_37 = read_state(input = v_cache2)[name = string("coreml_update_state_37")];
+            tensor<fp16, [768, 768]> var_261_to_fp16 = const()[name = string("op_261_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20064384)))];
+            tensor<fp16, [1, ?, 768]> linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_261_to_fp16, x = audio_data)[name = string("linear_10_cast_fp16")];
+            tensor<fp16, [768, 768]> var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21244096)))];
+            tensor<fp16, [768]> var_266_to_fp16 = const()[name = string("op_266_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22423808)))];
+            tensor<fp16, [1, ?, 768]> linear_11_cast_fp16 = linear(bias = var_266_to_fp16, weight = var_265_to_fp16, x = audio_data)[name = string("linear_11_cast_fp16")];
+            tensor<int32, [3]> var_268_shape_cast_fp16 = shape(x = linear_10_cast_fp16)[name = string("op_268_shape_cast_fp16")];
+            int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)];
+            int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)];
+            bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)];
+            string var_268_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_268_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_268_shape_cast_fp16_to_uint16 = cast(dtype = var_268_shape_cast_fp16_to_uint16_dtype_0, x = var_268_shape_cast_fp16)[name = string("cast_59")];
+            uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_268_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")];
+            string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_58")];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = gather_10_cast_uint16_to_int32)[name = string("expand_dims_51")];
+            tensor<int32, [4]> concat_35 = const()[name = string("concat_35"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [1]> concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)];
+            bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, expand_dims_51, concat_36_values3_0))[name = string("concat_36")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_35, begin_mask = k_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_36, end_mask = k_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_6_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_36)[name = string("k_cache2_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_6_cast_fp16, input = k_cache2)[name = string("coreml_update_state_38_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_38 = read_state(input = k_cache2)[name = string("coreml_update_state_38")];
+            tensor<int32, [3]> var_273_shape_cast_fp16 = shape(x = linear_11_cast_fp16)[name = string("op_273_shape_cast_fp16")];
+            int32 gather_11_axis_0 = const()[name = string("gather_11_axis_0"), val = int32(0)];
+            int32 gather_11_batch_dims_0 = const()[name = string("gather_11_batch_dims_0"), val = int32(0)];
+            bool gather_11_validate_indices_0 = const()[name = string("gather_11_validate_indices_0"), val = bool(false)];
+            string var_273_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_273_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_11_to_uint16 = const()[name = string("select_11_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_273_shape_cast_fp16_to_uint16 = cast(dtype = var_273_shape_cast_fp16_to_uint16_dtype_0, x = var_273_shape_cast_fp16)[name = string("cast_57")];
+            uint16 gather_11_cast_uint16 = gather(axis = gather_11_axis_0, batch_dims = gather_11_batch_dims_0, indices = select_11_to_uint16, validate_indices = gather_11_validate_indices_0, x = var_273_shape_cast_fp16_to_uint16)[name = string("gather_11_cast_uint16")];
+            string gather_11_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_11_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_55_axes_0 = const()[name = string("expand_dims_55_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_11_cast_uint16_to_int32 = cast(dtype = gather_11_cast_uint16_to_int32_dtype_0, x = gather_11_cast_uint16)[name = string("cast_56")];
+            tensor<int32, [1]> expand_dims_55 = expand_dims(axes = expand_dims_55_axes_0, x = gather_11_cast_uint16_to_int32)[name = string("expand_dims_55")];
+            tensor<int32, [4]> concat_38 = const()[name = string("concat_38"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [1]> concat_39_values0_0 = const()[name = string("concat_39_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)];
+            bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (concat_39_values0_0, concat_39_values1_0, expand_dims_55, concat_39_values3_0))[name = string("concat_39")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_38, begin_mask = v_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_39, end_mask = v_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_6_stride_0, update = linear_11_cast_fp16, x = coreml_update_state_37)[name = string("v_cache2_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_6_cast_fp16, input = v_cache2)[name = string("coreml_update_state_39_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_39 = read_state(input = v_cache2)[name = string("coreml_update_state_39")];
+            tensor<fp16, [768, 768]> var_295_to_fp16 = const()[name = string("op_295_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22425408)))];
+            tensor<fp16, [1, ?, 768]> linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_295_to_fp16, x = audio_data)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [768, 768]> var_299_to_fp16 = const()[name = string("op_299_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23605120)))];
+            tensor<fp16, [768]> var_300_to_fp16 = const()[name = string("op_300_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24784832)))];
+            tensor<fp16, [1, ?, 768]> linear_13_cast_fp16 = linear(bias = var_300_to_fp16, weight = var_299_to_fp16, x = audio_data)[name = string("linear_13_cast_fp16")];
+            tensor<int32, [3]> var_302_shape_cast_fp16 = shape(x = linear_12_cast_fp16)[name = string("op_302_shape_cast_fp16")];
+            int32 gather_12_axis_0 = const()[name = string("gather_12_axis_0"), val = int32(0)];
+            int32 gather_12_batch_dims_0 = const()[name = string("gather_12_batch_dims_0"), val = int32(0)];
+            bool gather_12_validate_indices_0 = const()[name = string("gather_12_validate_indices_0"), val = bool(false)];
+            string var_302_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_302_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_12_to_uint16 = const()[name = string("select_12_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_302_shape_cast_fp16_to_uint16 = cast(dtype = var_302_shape_cast_fp16_to_uint16_dtype_0, x = var_302_shape_cast_fp16)[name = string("cast_55")];
+            uint16 gather_12_cast_uint16 = gather(axis = gather_12_axis_0, batch_dims = gather_12_batch_dims_0, indices = select_12_to_uint16, validate_indices = gather_12_validate_indices_0, x = var_302_shape_cast_fp16_to_uint16)[name = string("gather_12_cast_uint16")];
+            string gather_12_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_12_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_59_axes_0 = const()[name = string("expand_dims_59_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_12_cast_uint16_to_int32 = cast(dtype = gather_12_cast_uint16_to_int32_dtype_0, x = gather_12_cast_uint16)[name = string("cast_54")];
+            tensor<int32, [1]> expand_dims_59 = expand_dims(axes = expand_dims_59_axes_0, x = gather_12_cast_uint16_to_int32)[name = string("expand_dims_59")];
+            tensor<int32, [4]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [1]> concat_42_values0_0 = const()[name = string("concat_42_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_42_values1_0 = const()[name = string("concat_42_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_42_values3_0 = const()[name = string("concat_42_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)];
+            bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (concat_42_values0_0, concat_42_values1_0, expand_dims_59, concat_42_values3_0))[name = string("concat_42")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_41, begin_mask = k_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_42, end_mask = k_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_7_stride_0, update = linear_12_cast_fp16, x = coreml_update_state_38)[name = string("k_cache2_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_7_cast_fp16, input = k_cache2)[name = string("coreml_update_state_40_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_40 = read_state(input = k_cache2)[name = string("coreml_update_state_40")];
+            tensor<int32, [3]> var_307_shape_cast_fp16 = shape(x = linear_13_cast_fp16)[name = string("op_307_shape_cast_fp16")];
+            int32 gather_13_axis_0 = const()[name = string("gather_13_axis_0"), val = int32(0)];
+            int32 gather_13_batch_dims_0 = const()[name = string("gather_13_batch_dims_0"), val = int32(0)];
+            bool gather_13_validate_indices_0 = const()[name = string("gather_13_validate_indices_0"), val = bool(false)];
+            string var_307_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_307_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_13_to_uint16 = const()[name = string("select_13_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_307_shape_cast_fp16_to_uint16 = cast(dtype = var_307_shape_cast_fp16_to_uint16_dtype_0, x = var_307_shape_cast_fp16)[name = string("cast_53")];
+            uint16 gather_13_cast_uint16 = gather(axis = gather_13_axis_0, batch_dims = gather_13_batch_dims_0, indices = select_13_to_uint16, validate_indices = gather_13_validate_indices_0, x = var_307_shape_cast_fp16_to_uint16)[name = string("gather_13_cast_uint16")];
+            string gather_13_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_13_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_63_axes_0 = const()[name = string("expand_dims_63_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_13_cast_uint16_to_int32 = cast(dtype = gather_13_cast_uint16_to_int32_dtype_0, x = gather_13_cast_uint16)[name = string("cast_52")];
+            tensor<int32, [1]> expand_dims_63 = expand_dims(axes = expand_dims_63_axes_0, x = gather_13_cast_uint16_to_int32)[name = string("expand_dims_63")];
+            tensor<int32, [4]> concat_44 = const()[name = string("concat_44"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [1]> concat_45_values0_0 = const()[name = string("concat_45_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_45_values1_0 = const()[name = string("concat_45_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_45_values3_0 = const()[name = string("concat_45_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_45_axis_0 = const()[name = string("concat_45_axis_0"), val = int32(0)];
+            bool concat_45_interleave_0 = const()[name = string("concat_45_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_45 = concat(axis = concat_45_axis_0, interleave = concat_45_interleave_0, values = (concat_45_values0_0, concat_45_values1_0, expand_dims_63, concat_45_values3_0))[name = string("concat_45")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_44, begin_mask = v_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_45, end_mask = v_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_7_stride_0, update = linear_13_cast_fp16, x = coreml_update_state_39)[name = string("v_cache2_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_7_cast_fp16, input = v_cache2)[name = string("coreml_update_state_41_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_41 = read_state(input = v_cache2)[name = string("coreml_update_state_41")];
+            tensor<fp16, [768, 768]> var_329_to_fp16 = const()[name = string("op_329_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24786432)))];
+            tensor<fp16, [1, ?, 768]> linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_329_to_fp16, x = audio_data)[name = string("linear_14_cast_fp16")];
+            tensor<fp16, [768, 768]> var_333_to_fp16 = const()[name = string("op_333_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25966144)))];
+            tensor<fp16, [768]> var_334_to_fp16 = const()[name = string("op_334_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27145856)))];
+            tensor<fp16, [1, ?, 768]> linear_15_cast_fp16 = linear(bias = var_334_to_fp16, weight = var_333_to_fp16, x = audio_data)[name = string("linear_15_cast_fp16")];
+            tensor<int32, [3]> var_336_shape_cast_fp16 = shape(x = linear_14_cast_fp16)[name = string("op_336_shape_cast_fp16")];
+            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
+            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
+            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
+            string var_336_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_336_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_336_shape_cast_fp16_to_uint16 = cast(dtype = var_336_shape_cast_fp16_to_uint16_dtype_0, x = var_336_shape_cast_fp16)[name = string("cast_51")];
+            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_336_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
+            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_50")];
+            tensor<int32, [1]> expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = gather_14_cast_uint16_to_int32)[name = string("expand_dims_67")];
+            tensor<int32, [4]> concat_47 = const()[name = string("concat_47"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_48_values1_0 = const()[name = string("concat_48_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_48_values3_0 = const()[name = string("concat_48_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
+            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, concat_48_values1_0, expand_dims_67, concat_48_values3_0))[name = string("concat_48")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_47, begin_mask = k_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_48, end_mask = k_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_8_stride_0, update = linear_14_cast_fp16, x = coreml_update_state_40)[name = string("k_cache2_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_8_cast_fp16, input = k_cache2)[name = string("coreml_update_state_42_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_42 = read_state(input = k_cache2)[name = string("coreml_update_state_42")];
+            tensor<int32, [3]> var_341_shape_cast_fp16 = shape(x = linear_15_cast_fp16)[name = string("op_341_shape_cast_fp16")];
+            int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)];
+            int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)];
+            bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)];
+            string var_341_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_341_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_341_shape_cast_fp16_to_uint16 = cast(dtype = var_341_shape_cast_fp16_to_uint16_dtype_0, x = var_341_shape_cast_fp16)[name = string("cast_49")];
+            uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_341_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")];
+            string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_71_axes_0 = const()[name = string("expand_dims_71_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_48")];
+            tensor<int32, [1]> expand_dims_71 = expand_dims(axes = expand_dims_71_axes_0, x = gather_15_cast_uint16_to_int32)[name = string("expand_dims_71")];
+            tensor<int32, [4]> concat_50 = const()[name = string("concat_50"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [1]> concat_51_values0_0 = const()[name = string("concat_51_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)];
+            bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (concat_51_values0_0, concat_51_values1_0, expand_dims_71, concat_51_values3_0))[name = string("concat_51")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_50, begin_mask = v_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_51, end_mask = v_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_8_stride_0, update = linear_15_cast_fp16, x = coreml_update_state_41)[name = string("v_cache2_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_8_cast_fp16, input = v_cache2)[name = string("coreml_update_state_43_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_43 = read_state(input = v_cache2)[name = string("coreml_update_state_43")];
+            tensor<fp16, [768, 768]> var_363_to_fp16 = const()[name = string("op_363_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27147456)))];
+            tensor<fp16, [1, ?, 768]> linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_363_to_fp16, x = audio_data)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [768, 768]> var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28327168)))];
+            tensor<fp16, [768]> var_368_to_fp16 = const()[name = string("op_368_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29506880)))];
+            tensor<fp16, [1, ?, 768]> linear_17_cast_fp16 = linear(bias = var_368_to_fp16, weight = var_367_to_fp16, x = audio_data)[name = string("linear_17_cast_fp16")];
+            tensor<int32, [3]> var_370_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_370_shape_cast_fp16")];
+            int32 gather_16_axis_0 = const()[name = string("gather_16_axis_0"), val = int32(0)];
+            int32 gather_16_batch_dims_0 = const()[name = string("gather_16_batch_dims_0"), val = int32(0)];
+            bool gather_16_validate_indices_0 = const()[name = string("gather_16_validate_indices_0"), val = bool(false)];
+            string var_370_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_370_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_16_to_uint16 = const()[name = string("select_16_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_370_shape_cast_fp16_to_uint16 = cast(dtype = var_370_shape_cast_fp16_to_uint16_dtype_0, x = var_370_shape_cast_fp16)[name = string("cast_47")];
+            uint16 gather_16_cast_uint16 = gather(axis = gather_16_axis_0, batch_dims = gather_16_batch_dims_0, indices = select_16_to_uint16, validate_indices = gather_16_validate_indices_0, x = var_370_shape_cast_fp16_to_uint16)[name = string("gather_16_cast_uint16")];
+            string gather_16_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_16_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_75_axes_0 = const()[name = string("expand_dims_75_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_16_cast_uint16_to_int32 = cast(dtype = gather_16_cast_uint16_to_int32_dtype_0, x = gather_16_cast_uint16)[name = string("cast_46")];
+            tensor<int32, [1]> expand_dims_75 = expand_dims(axes = expand_dims_75_axes_0, x = gather_16_cast_uint16_to_int32)[name = string("expand_dims_75")];
+            tensor<int32, [4]> concat_53 = const()[name = string("concat_53"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [1]> concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_54_values1_0 = const()[name = string("concat_54_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_54_values3_0 = const()[name = string("concat_54_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
+            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, concat_54_values1_0, expand_dims_75, concat_54_values3_0))[name = string("concat_54")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_53, begin_mask = k_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_54, end_mask = k_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_9_stride_0, update = linear_16_cast_fp16, x = coreml_update_state_42)[name = string("k_cache2_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_9_cast_fp16, input = k_cache2)[name = string("coreml_update_state_44_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_44 = read_state(input = k_cache2)[name = string("coreml_update_state_44")];
+            tensor<int32, [3]> var_375_shape_cast_fp16 = shape(x = linear_17_cast_fp16)[name = string("op_375_shape_cast_fp16")];
+            int32 gather_17_axis_0 = const()[name = string("gather_17_axis_0"), val = int32(0)];
+            int32 gather_17_batch_dims_0 = const()[name = string("gather_17_batch_dims_0"), val = int32(0)];
+            bool gather_17_validate_indices_0 = const()[name = string("gather_17_validate_indices_0"), val = bool(false)];
+            string var_375_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_375_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_17_to_uint16 = const()[name = string("select_17_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_375_shape_cast_fp16_to_uint16 = cast(dtype = var_375_shape_cast_fp16_to_uint16_dtype_0, x = var_375_shape_cast_fp16)[name = string("cast_45")];
+            uint16 gather_17_cast_uint16 = gather(axis = gather_17_axis_0, batch_dims = gather_17_batch_dims_0, indices = select_17_to_uint16, validate_indices = gather_17_validate_indices_0, x = var_375_shape_cast_fp16_to_uint16)[name = string("gather_17_cast_uint16")];
+            string gather_17_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_17_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_79_axes_0 = const()[name = string("expand_dims_79_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_17_cast_uint16_to_int32 = cast(dtype = gather_17_cast_uint16_to_int32_dtype_0, x = gather_17_cast_uint16)[name = string("cast_44")];
+            tensor<int32, [1]> expand_dims_79 = expand_dims(axes = expand_dims_79_axes_0, x = gather_17_cast_uint16_to_int32)[name = string("expand_dims_79")];
+            tensor<int32, [4]> concat_56 = const()[name = string("concat_56"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [1]> concat_57_values0_0 = const()[name = string("concat_57_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)];
+            bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (concat_57_values0_0, concat_57_values1_0, expand_dims_79, concat_57_values3_0))[name = string("concat_57")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_56, begin_mask = v_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_57, end_mask = v_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_9_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_43)[name = string("v_cache2_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_9_cast_fp16, input = v_cache2)[name = string("coreml_update_state_45_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_45 = read_state(input = v_cache2)[name = string("coreml_update_state_45")];
+            tensor<fp16, [768, 768]> var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29508480)))];
+            tensor<fp16, [1, ?, 768]> linear_18_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_397_to_fp16, x = audio_data)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [768, 768]> var_401_to_fp16 = const()[name = string("op_401_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30688192)))];
+            tensor<fp16, [768]> var_402_to_fp16 = const()[name = string("op_402_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31867904)))];
+            tensor<fp16, [1, ?, 768]> linear_19_cast_fp16 = linear(bias = var_402_to_fp16, weight = var_401_to_fp16, x = audio_data)[name = string("linear_19_cast_fp16")];
+            tensor<int32, [3]> var_404_shape_cast_fp16 = shape(x = linear_18_cast_fp16)[name = string("op_404_shape_cast_fp16")];
+            int32 gather_18_axis_0 = const()[name = string("gather_18_axis_0"), val = int32(0)];
+            int32 gather_18_batch_dims_0 = const()[name = string("gather_18_batch_dims_0"), val = int32(0)];
+            bool gather_18_validate_indices_0 = const()[name = string("gather_18_validate_indices_0"), val = bool(false)];
+            string var_404_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_404_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_18_to_uint16 = const()[name = string("select_18_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_404_shape_cast_fp16_to_uint16 = cast(dtype = var_404_shape_cast_fp16_to_uint16_dtype_0, x = var_404_shape_cast_fp16)[name = string("cast_43")];
+            uint16 gather_18_cast_uint16 = gather(axis = gather_18_axis_0, batch_dims = gather_18_batch_dims_0, indices = select_18_to_uint16, validate_indices = gather_18_validate_indices_0, x = var_404_shape_cast_fp16_to_uint16)[name = string("gather_18_cast_uint16")];
+            string gather_18_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_18_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_18_cast_uint16_to_int32 = cast(dtype = gather_18_cast_uint16_to_int32_dtype_0, x = gather_18_cast_uint16)[name = string("cast_42")];
+            tensor<int32, [1]> expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = gather_18_cast_uint16_to_int32)[name = string("expand_dims_83")];
+            tensor<int32, [4]> concat_59 = const()[name = string("concat_59"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [1]> concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
+            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, concat_60_values1_0, expand_dims_83, concat_60_values3_0))[name = string("concat_60")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_59, begin_mask = k_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_60, end_mask = k_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_10_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_44)[name = string("k_cache2_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_10_cast_fp16, input = k_cache2)[name = string("coreml_update_state_46_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_46 = read_state(input = k_cache2)[name = string("coreml_update_state_46")];
+            tensor<int32, [3]> var_409_shape_cast_fp16 = shape(x = linear_19_cast_fp16)[name = string("op_409_shape_cast_fp16")];
+            int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)];
+            int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)];
+            bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)];
+            string var_409_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_409_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_409_shape_cast_fp16_to_uint16 = cast(dtype = var_409_shape_cast_fp16_to_uint16_dtype_0, x = var_409_shape_cast_fp16)[name = string("cast_41")];
+            uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_409_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")];
+            string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_87_axes_0 = const()[name = string("expand_dims_87_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_40")];
+            tensor<int32, [1]> expand_dims_87 = expand_dims(axes = expand_dims_87_axes_0, x = gather_19_cast_uint16_to_int32)[name = string("expand_dims_87")];
+            tensor<int32, [4]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [1]> concat_63_values0_0 = const()[name = string("concat_63_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)];
+            bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (concat_63_values0_0, concat_63_values1_0, expand_dims_87, concat_63_values3_0))[name = string("concat_63")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_62, begin_mask = v_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_63, end_mask = v_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_10_stride_0, update = linear_19_cast_fp16, x = coreml_update_state_45)[name = string("v_cache2_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_10_cast_fp16, input = v_cache2)[name = string("coreml_update_state_47_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_47 = read_state(input = v_cache2)[name = string("coreml_update_state_47")];
+            tensor<fp16, [768, 768]> var_431_to_fp16 = const()[name = string("op_431_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31869504)))];
+            tensor<fp16, [1, ?, 768]> linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_431_to_fp16, x = audio_data)[name = string("linear_20_cast_fp16")];
+            tensor<fp16, [768, 768]> var_435_to_fp16 = const()[name = string("op_435_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33049216)))];
+            tensor<fp16, [768]> var_436_to_fp16 = const()[name = string("op_436_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34228928)))];
+            tensor<fp16, [1, ?, 768]> linear_21_cast_fp16 = linear(bias = var_436_to_fp16, weight = var_435_to_fp16, x = audio_data)[name = string("linear_21_cast_fp16")];
+            tensor<int32, [3]> var_438_shape_cast_fp16 = shape(x = linear_20_cast_fp16)[name = string("op_438_shape_cast_fp16")];
+            int32 gather_20_axis_0 = const()[name = string("gather_20_axis_0"), val = int32(0)];
+            int32 gather_20_batch_dims_0 = const()[name = string("gather_20_batch_dims_0"), val = int32(0)];
+            bool gather_20_validate_indices_0 = const()[name = string("gather_20_validate_indices_0"), val = bool(false)];
+            string var_438_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_438_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_20_to_uint16 = const()[name = string("select_20_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_438_shape_cast_fp16_to_uint16 = cast(dtype = var_438_shape_cast_fp16_to_uint16_dtype_0, x = var_438_shape_cast_fp16)[name = string("cast_39")];
+            uint16 gather_20_cast_uint16 = gather(axis = gather_20_axis_0, batch_dims = gather_20_batch_dims_0, indices = select_20_to_uint16, validate_indices = gather_20_validate_indices_0, x = var_438_shape_cast_fp16_to_uint16)[name = string("gather_20_cast_uint16")];
+            string gather_20_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_20_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_91_axes_0 = const()[name = string("expand_dims_91_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_20_cast_uint16_to_int32 = cast(dtype = gather_20_cast_uint16_to_int32_dtype_0, x = gather_20_cast_uint16)[name = string("cast_38")];
+            tensor<int32, [1]> expand_dims_91 = expand_dims(axes = expand_dims_91_axes_0, x = gather_20_cast_uint16_to_int32)[name = string("expand_dims_91")];
+            tensor<int32, [4]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [1]> concat_66_values0_0 = const()[name = string("concat_66_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_66_values1_0 = const()[name = string("concat_66_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_66_values3_0 = const()[name = string("concat_66_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)];
+            bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (concat_66_values0_0, concat_66_values1_0, expand_dims_91, concat_66_values3_0))[name = string("concat_66")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_65, begin_mask = k_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_66, end_mask = k_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_11_stride_0, update = linear_20_cast_fp16, x = coreml_update_state_46)[name = string("k_cache2_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_11_cast_fp16, input = k_cache2)[name = string("coreml_update_state_48_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_48 = read_state(input = k_cache2)[name = string("coreml_update_state_48")];
+            tensor<int32, [3]> var_443_shape_cast_fp16 = shape(x = linear_21_cast_fp16)[name = string("op_443_shape_cast_fp16")];
+            int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)];
+            int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)];
+            bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)];
+            string var_443_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_443_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_443_shape_cast_fp16_to_uint16 = cast(dtype = var_443_shape_cast_fp16_to_uint16_dtype_0, x = var_443_shape_cast_fp16)[name = string("cast_37")];
+            uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_443_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")];
+            string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_95_axes_0 = const()[name = string("expand_dims_95_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_36")];
+            tensor<int32, [1]> expand_dims_95 = expand_dims(axes = expand_dims_95_axes_0, x = gather_21_cast_uint16_to_int32)[name = string("expand_dims_95")];
+            tensor<int32, [4]> concat_68 = const()[name = string("concat_68"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [1]> concat_69_values0_0 = const()[name = string("concat_69_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_69_values1_0 = const()[name = string("concat_69_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_69_values3_0 = const()[name = string("concat_69_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_69_axis_0 = const()[name = string("concat_69_axis_0"), val = int32(0)];
+            bool concat_69_interleave_0 = const()[name = string("concat_69_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_69 = concat(axis = concat_69_axis_0, interleave = concat_69_interleave_0, values = (concat_69_values0_0, concat_69_values1_0, expand_dims_95, concat_69_values3_0))[name = string("concat_69")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_68, begin_mask = v_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_69, end_mask = v_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_11_stride_0, update = linear_21_cast_fp16, x = coreml_update_state_47)[name = string("v_cache2_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_11_cast_fp16, input = v_cache2)[name = string("coreml_update_state_49_write_state")];
+            tensor<fp16, [12, 1, 1500, 768]> coreml_update_state_49 = read_state(input = v_cache2)[name = string("coreml_update_state_49")];
+            tensor<fp16, [768, 768]> var_465_to_fp16 = const()[name = string("op_465_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34230528)))];
+            tensor<fp16, [1, ?, 768]> linear_22_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_465_to_fp16, x = audio_data)[name = string("linear_22_cast_fp16")];
+            tensor<fp16, [768, 768]> var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35410240)))];
+            tensor<fp16, [768]> var_470_to_fp16 = const()[name = string("op_470_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36589952)))];
+            tensor<fp16, [1, ?, 768]> linear_23_cast_fp16 = linear(bias = var_470_to_fp16, weight = var_469_to_fp16, x = audio_data)[name = string("linear_23_cast_fp16")];
+            tensor<int32, [3]> var_472_shape_cast_fp16 = shape(x = linear_22_cast_fp16)[name = string("op_472_shape_cast_fp16")];
+            int32 gather_22_axis_0 = const()[name = string("gather_22_axis_0"), val = int32(0)];
+            int32 gather_22_batch_dims_0 = const()[name = string("gather_22_batch_dims_0"), val = int32(0)];
+            bool gather_22_validate_indices_0 = const()[name = string("gather_22_validate_indices_0"), val = bool(false)];
+            string var_472_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_472_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_22_to_uint16 = const()[name = string("select_22_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_472_shape_cast_fp16_to_uint16 = cast(dtype = var_472_shape_cast_fp16_to_uint16_dtype_0, x = var_472_shape_cast_fp16)[name = string("cast_35")];
+            uint16 gather_22_cast_uint16 = gather(axis = gather_22_axis_0, batch_dims = gather_22_batch_dims_0, indices = select_22_to_uint16, validate_indices = gather_22_validate_indices_0, x = var_472_shape_cast_fp16_to_uint16)[name = string("gather_22_cast_uint16")];
+            string gather_22_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_22_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_22_cast_uint16_to_int32 = cast(dtype = gather_22_cast_uint16_to_int32_dtype_0, x = gather_22_cast_uint16)[name = string("cast_34")];
+            tensor<int32, [1]> expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = gather_22_cast_uint16_to_int32)[name = string("expand_dims_99")];
+            tensor<int32, [4]> concat_71 = const()[name = string("concat_71"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [1]> concat_72_values0_0 = const()[name = string("concat_72_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_72_values1_0 = const()[name = string("concat_72_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_72_values3_0 = const()[name = string("concat_72_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)];
+            bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (concat_72_values0_0, concat_72_values1_0, expand_dims_99, concat_72_values3_0))[name = string("concat_72")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> k_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_71, begin_mask = k_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_72, end_mask = k_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_12_stride_0, update = linear_22_cast_fp16, x = coreml_update_state_48)[name = string("k_cache2_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_12_cast_fp16, input = k_cache2)[name = string("coreml_update_state_50_write_state")];
+            tensor<int32, [3]> var_477_shape_cast_fp16 = shape(x = linear_23_cast_fp16)[name = string("op_477_shape_cast_fp16")];
+            int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)];
+            int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)];
+            bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)];
+            string var_477_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_477_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_477_shape_cast_fp16_to_uint16 = cast(dtype = var_477_shape_cast_fp16_to_uint16_dtype_0, x = var_477_shape_cast_fp16)[name = string("cast_33")];
+            uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_477_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")];
+            string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_103_axes_0 = const()[name = string("expand_dims_103_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_32")];
+            tensor<int32, [1]> expand_dims_103 = expand_dims(axes = expand_dims_103_axes_0, x = gather_23_cast_uint16_to_int32)[name = string("expand_dims_103")];
+            tensor<int32, [4]> concat_74 = const()[name = string("concat_74"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [1]> concat_75_values0_0 = const()[name = string("concat_75_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)];
+            bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (concat_75_values0_0, concat_75_values1_0, expand_dims_103, concat_75_values3_0))[name = string("concat_75")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 1500, 768]> v_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_74, begin_mask = v_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_75, end_mask = v_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_12_stride_0, update = linear_23_cast_fp16, x = coreml_update_state_49)[name = string("v_cache2_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_12_cast_fp16, input = v_cache2)[name = string("coreml_update_state_51_write_state")];
+        } -> (dummy);
+}
\ No newline at end of file
diff --git a/small/decoder_first.mlmodelc/weights/weight.bin b/small/decoder_first.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..10b4ed02bb0d11bb4330f010cb389bbb65df12f5
--- /dev/null
+++ b/small/decoder_first.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3059d6670bb065c1eefeff01f7c5496af03cd5f48621357792473c5b63044b3e
+size 36591552
diff --git a/small/decoder_second.mlmodelc/analytics/coremldata.bin b/small/decoder_second.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..91ade0def0e2444c4273db0aaf39fad3ca7f7067
--- /dev/null
+++ b/small/decoder_second.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b477d0308b85c7fa966f41c8bb93a37823206812e444a48a26fb153629700dd9
+size 243
diff --git a/small/decoder_second.mlmodelc/coremldata.bin b/small/decoder_second.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2f2ca8662614ee4c829f6f3eb183306165e77122
--- /dev/null
+++ b/small/decoder_second.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e102e9f0547bcdce522991060c135b72b7dfeca80443bb17033816100bc5841
+size 487
diff --git a/small/decoder_second.mlmodelc/metadata.json b/small/decoder_second.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..90f741660187450796a7f8b2a43fa2fa75181984
--- /dev/null
+++ b/small/decoder_second.mlmodelc/metadata.json
@@ -0,0 +1,127 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.linear" : 97,
+      "Ios18.readState" : 26,
+      "Ios18.expandDims" : 13,
+      "Ios18.sub" : 1,
+      "Ios18.matmul" : 48,
+      "Ios18.gelu" : 12,
+      "Ios18.gather" : 15,
+      "Ios18.concat" : 62,
+      "Shape" : 14,
+      "Ios18.add" : 61,
+      "Ios18.sliceUpdate" : 48,
+      "Ios18.sliceByIndex" : 97,
+      "Ios18.layerNorm" : 37,
+      "Ios18.cast" : 28,
+      "Ios18.transpose" : 96,
+      "Ios18.writeState" : 24,
+      "Ios18.reshape" : 96,
+      "Ios18.softmax" : 24,
+      "Ios18.mul" : 48
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 1 × 448 × 768)",
+        "shortDescription" : "",
+        "shape" : "[12, 1, 448, 768]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 1 × 448 × 768)",
+        "shortDescription" : "",
+        "shape" : "[12, 1, 448, 768]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 1 × 1500 × 768)",
+        "shortDescription" : "",
+        "shape" : "[12, 1, 1500, 768]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 1 × 1500 × 768)",
+        "shortDescription" : "",
+        "shape" : "[12, 1, 1500, 768]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Int32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Int32 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "token_data",
+        "shortDescription" : ""
+      },
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Float16 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "offset_mask",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_second",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/small/decoder_second.mlmodelc/model.mil b/small/decoder_second.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..0cda46e1dd0c22cd1586d0e31cc8cc229178acfc
--- /dev/null
+++ b/small/decoder_second.mlmodelc/model.mil
@@ -0,0 +1,2398 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(state<tensor<fp16, [12, 1, 448, 768]>> k_cache1, state<tensor<fp16, [12, 1, 1500, 768]>> k_cache2, tensor<fp16, [1, ?]> offset_mask, tensor<int32, [1, ?]> token_data, state<tensor<fp16, [12, 1, 448, 768]>> v_cache1, state<tensor<fp16, [12, 1, 1500, 768]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] {
+            tensor<int32, [2]> var_38_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_38_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_38_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_38_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [2]> var_38_shape_cast_fp16_to_int16 = cast(dtype = var_38_shape_cast_fp16_to_int16_dtype_0, x = var_38_shape_cast_fp16)[name = string("cast_154")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_38_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [2]> var_42_shape = shape(x = token_data)[name = string("op_42_shape")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_42_shape_to_uint16_dtype_0 = const()[name = string("op_42_shape_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [2]> var_42_shape_to_uint16 = cast(dtype = var_42_shape_to_uint16_dtype_0, x = var_42_shape)[name = string("cast_152")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_42_shape_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_151")];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_153")];
+            int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")];
+            int32 var_74_axis_0 = const()[name = string("op_74_axis_0"), val = int32(0)];
+            int32 var_74_batch_dims_0 = const()[name = string("op_74_batch_dims_0"), val = int32(0)];
+            bool var_74_validate_indices_0 = const()[name = string("op_74_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [51865, 768]> token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor<fp16, [51865, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, ?, 768]> var_74_cast_fp16 = gather(axis = var_74_axis_0, batch_dims = var_74_batch_dims_0, indices = token_data, validate_indices = var_74_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_74_cast_fp16")];
+            int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)];
+            int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)];
+            bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")];
+            int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(768)];
+            int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)];
+            bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")];
+            tensor<bool, [2]> var_77_end_mask_0 = const()[name = string("op_77_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 768]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [448, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79664768)))];
+            tensor<fp16, [?, ?]> var_77_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_77_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_77_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_3_cast_fp16 = add(x = var_74_cast_fp16, y = var_77_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<fp16, [12, 1, 448, 768]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")];
+            tensor<fp16, [12, 1, 448, 768]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")];
+            tensor<fp16, [12, 1, 1500, 768]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<int32, [4]> k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")];
+            tensor<fp16, [12, 1, 1500, 768]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<int32, [4]> v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")];
+            int32 var_100 = const()[name = string("op_100"), val = int32(-1)];
+            tensor<int32, [1]> var_118_axes_0 = const()[name = string("op_118_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80352960)))];
+            tensor<fp16, [768]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80354560)))];
+            fp16 var_106_to_fp16 = const()[name = string("op_106_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_118_cast_fp16 = layer_norm(axes = var_118_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_106_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_118_cast_fp16")];
+            tensor<fp16, [768, 768]> var_129_to_fp16 = const()[name = string("op_129_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80356160)))];
+            tensor<fp16, [768]> var_130_to_fp16 = const()[name = string("op_130_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81535872)))];
+            tensor<fp16, [1, ?, 768]> linear_0_cast_fp16 = linear(bias = var_130_to_fp16, weight = var_129_to_fp16, x = var_118_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [768, 768]> var_133_to_fp16 = const()[name = string("op_133_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81537472)))];
+            tensor<fp16, [768]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82717184)))];
+            tensor<fp16, [1, ?, 768]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_133_to_fp16, x = var_118_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [768, 768]> var_137_to_fp16 = const()[name = string("op_137_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82718784)))];
+            tensor<fp16, [768]> var_138_to_fp16 = const()[name = string("op_138_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83898496)))];
+            tensor<fp16, [1, ?, 768]> linear_2_cast_fp16 = linear(bias = var_138_to_fp16, weight = var_137_to_fp16, x = var_118_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [3]> var_140_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_140_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_140_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_140_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_140_shape_cast_fp16_to_uint16 = cast(dtype = var_140_shape_cast_fp16_to_uint16_dtype_0, x = var_140_shape_cast_fp16)[name = string("cast_150")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_140_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_149")];
+            int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")];
+            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")];
+            tensor<int32, [1]> expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")];
+            tensor<int32, [1]> concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)];
+            bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")];
+            tensor<int32, [1]> concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)];
+            bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_24_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_24 = read_state(input = k_cache1)[name = string("coreml_update_state_24")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_25_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_25 = read_state(input = v_cache1)[name = string("coreml_update_state_25")];
+            int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)];
+            int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(768)];
+            int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
+            bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")];
+            tensor<int32, [3]> var_156_begin_0 = const()[name = string("op_156_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_156_end_mask_0 = const()[name = string("op_156_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_156_cast_fp16 = slice_by_index(begin = var_156_begin_0, end = concat_10, end_mask = var_156_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_156_cast_fp16")];
+            tensor<int32, [3]> var_159_begin_0 = const()[name = string("op_159_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_159_end_mask_0 = const()[name = string("op_159_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_159_cast_fp16 = slice_by_index(begin = var_159_begin_0, end = concat_10, end_mask = var_159_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_159_cast_fp16")];
+            tensor<int32, [4]> concat_12x = const()[name = string("concat_12x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_169_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_169_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_60_to_fp16 = const()[name = string("const_60_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_3_cast_fp16 = mul(x = var_169_cast_fp16, y = const_60_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> concat_13x = const()[name = string("concat_13x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_176_cast_fp16 = reshape(shape = concat_13x, x = var_156_cast_fp16)[name = string("op_176_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_61_to_fp16 = const()[name = string("const_61_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_5_cast_fp16 = mul(x = var_176_cast_fp16, y = const_61_to_fp16)[name = string("k_5_cast_fp16")];
+            tensor<int32, [4]> concat_14x = const()[name = string("concat_14x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_183_cast_fp16 = reshape(shape = concat_14x, x = var_159_cast_fp16)[name = string("op_183_cast_fp16")];
+            tensor<int32, [4]> var_184 = const()[name = string("op_184"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_98 = transpose(perm = transpose_98_perm_0, x = k_5_cast_fp16)[name = string("transpose_238")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_97 = transpose(perm = transpose_97_perm_0, x = q_3_cast_fp16)[name = string("transpose_239")];
+            tensor<fp16, [1, 12, ?, ?]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_97, y = transpose_98)[name = string("qk_1_cast_fp16")];
+            int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")];
+            tensor<int32, [2]> var_187_begin_0 = const()[name = string("op_187_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_187_end_mask_0 = const()[name = string("op_187_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 448]> mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor<fp16, [448, 448]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83900096)))];
+            tensor<fp16, [?, 448]> var_187_cast_fp16 = slice_by_index(begin = var_187_begin_0, end = concat_15, end_mask = var_187_end_mask_0, x = mask_to_fp16)[name = string("op_187_cast_fp16")];
+            int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)];
+            int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)];
+            bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")];
+            tensor<int32, [2]> var_188_begin_0 = const()[name = string("op_188_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_188_end_mask_0 = const()[name = string("op_188_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_188_cast_fp16 = slice_by_index(begin = var_188_begin_0, end = concat_16, end_mask = var_188_end_mask_0, x = var_187_cast_fp16)[name = string("op_188_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_188_cast_fp16)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_191_cast_fp16 = softmax(axis = var_100, x = qk_3_cast_fp16)[name = string("op_191_cast_fp16")];
+            bool var_193_transpose_x_0 = const()[name = string("op_193_transpose_x_0"), val = bool(false)];
+            bool var_193_transpose_y_0 = const()[name = string("op_193_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_5_cast_fp16 = transpose(perm = var_184, x = var_183_cast_fp16)[name = string("transpose_240")];
+            tensor<fp16, [1, 12, ?, 64]> var_193_cast_fp16 = matmul(transpose_x = var_193_transpose_x_0, transpose_y = var_193_transpose_y_0, x = var_191_cast_fp16, y = v_5_cast_fp16)[name = string("op_193_cast_fp16")];
+            tensor<int32, [4]> var_194 = const()[name = string("op_194"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_17x = const()[name = string("concat_17x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_195_cast_fp16 = transpose(perm = var_194, x = var_193_cast_fp16)[name = string("transpose_237")];
+            tensor<fp16, [1, ?, 768]> x_7_cast_fp16 = reshape(shape = concat_17x, x = var_195_cast_fp16)[name = string("x_7_cast_fp16")];
+            tensor<fp16, [768, 768]> var_199_to_fp16 = const()[name = string("op_199_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84301568)))];
+            tensor<fp16, [768]> var_200_to_fp16 = const()[name = string("op_200_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85481280)))];
+            tensor<fp16, [1, ?, 768]> linear_3_cast_fp16 = linear(bias = var_200_to_fp16, weight = var_199_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")];
+            tensor<int32, [1]> var_207_axes_0 = const()[name = string("op_207_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85482880)))];
+            tensor<fp16, [768]> blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85484480)))];
+            tensor<fp16, [1, ?, 768]> var_207_cast_fp16 = layer_norm(axes = var_207_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_106_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_207_cast_fp16")];
+            tensor<fp16, [768, 768]> var_216_to_fp16 = const()[name = string("op_216_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85486080)))];
+            tensor<fp16, [768]> var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86665792)))];
+            tensor<fp16, [1, ?, 768]> linear_4_cast_fp16 = linear(bias = var_217_to_fp16, weight = var_216_to_fp16, x = var_207_cast_fp16)[name = string("linear_4_cast_fp16")];
+            tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor<fp16, [1, 1500, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86667392)))];
+            tensor<fp16, [1, 1500, 768]> k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_22x = const()[name = string("concat_22x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_237_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_237_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_62_to_fp16 = const()[name = string("const_62_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_7_cast_fp16 = mul(x = var_237_cast_fp16, y = const_62_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_243 = const()[name = string("op_243"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_244_cast_fp16 = reshape(shape = var_243, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_244_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_63_to_fp16 = const()[name = string("const_63_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_9_cast_fp16 = mul(x = var_244_cast_fp16, y = const_63_to_fp16)[name = string("k_9_cast_fp16")];
+            tensor<int32, [4]> var_250 = const()[name = string("op_250"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_251_cast_fp16 = reshape(shape = var_250, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_251_cast_fp16")];
+            tensor<int32, [4]> var_252 = const()[name = string("op_252"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_100 = transpose(perm = transpose_100_perm_0, x = k_9_cast_fp16)[name = string("transpose_234")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_99 = transpose(perm = transpose_99_perm_0, x = q_7_cast_fp16)[name = string("transpose_235")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_99, y = transpose_100)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_256_cast_fp16 = softmax(axis = var_100, x = qk_5_cast_fp16)[name = string("op_256_cast_fp16")];
+            bool var_258_transpose_x_0 = const()[name = string("op_258_transpose_x_0"), val = bool(false)];
+            bool var_258_transpose_y_0 = const()[name = string("op_258_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_9_cast_fp16 = transpose(perm = var_252, x = var_251_cast_fp16)[name = string("transpose_236")];
+            tensor<fp16, [1, 12, ?, 64]> var_258_cast_fp16 = matmul(transpose_x = var_258_transpose_x_0, transpose_y = var_258_transpose_y_0, x = var_256_cast_fp16, y = v_9_cast_fp16)[name = string("op_258_cast_fp16")];
+            tensor<int32, [4]> var_259 = const()[name = string("op_259"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_23x = const()[name = string("concat_23x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_260_cast_fp16 = transpose(perm = var_259, x = var_258_cast_fp16)[name = string("transpose_233")];
+            tensor<fp16, [1, ?, 768]> x_13_cast_fp16 = reshape(shape = concat_23x, x = var_260_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<fp16, [768, 768]> var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88971456)))];
+            tensor<fp16, [768]> var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90151168)))];
+            tensor<fp16, [1, ?, 768]> linear_5_cast_fp16 = linear(bias = var_265_to_fp16, weight = var_264_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")];
+            tensor<int32, [1]> var_272_axes_0 = const()[name = string("op_272_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90152768)))];
+            tensor<fp16, [768]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90154368)))];
+            tensor<fp16, [1, ?, 768]> var_272_cast_fp16 = layer_norm(axes = var_272_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_106_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_272_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_281_to_fp16 = const()[name = string("op_281_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90155968)))];
+            tensor<fp16, [3072]> var_282_to_fp16 = const()[name = string("op_282_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94874624)))];
+            tensor<fp16, [1, ?, 3072]> linear_6_cast_fp16 = linear(bias = var_282_to_fp16, weight = var_281_to_fp16, x = var_272_cast_fp16)[name = string("linear_6_cast_fp16")];
+            string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_287_to_fp16 = const()[name = string("op_287_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94880832)))];
+            tensor<fp16, [768]> var_288_to_fp16 = const()[name = string("op_288_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99599488)))];
+            tensor<fp16, [1, ?, 768]> linear_7_cast_fp16 = linear(bias = var_288_to_fp16, weight = var_287_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_24)[name = string("k_cache_5_cast_fp16")];
+            tensor<int32, [4]> v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_25)[name = string("v_cache_5_cast_fp16")];
+            tensor<int32, [4]> k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")];
+            tensor<int32, [4]> v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")];
+            int32 var_311 = const()[name = string("op_311"), val = int32(-1)];
+            tensor<int32, [1]> var_329_axes_0 = const()[name = string("op_329_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99601088)))];
+            tensor<fp16, [768]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99602688)))];
+            fp16 var_317_to_fp16 = const()[name = string("op_317_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_329_cast_fp16 = layer_norm(axes = var_329_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_317_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_329_cast_fp16")];
+            tensor<fp16, [768, 768]> var_340_to_fp16 = const()[name = string("op_340_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99604288)))];
+            tensor<fp16, [768]> var_341_to_fp16 = const()[name = string("op_341_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100784000)))];
+            tensor<fp16, [1, ?, 768]> linear_8_cast_fp16 = linear(bias = var_341_to_fp16, weight = var_340_to_fp16, x = var_329_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [768, 768]> var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100785600)))];
+            tensor<fp16, [1, ?, 768]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_344_to_fp16, x = var_329_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [768, 768]> var_348_to_fp16 = const()[name = string("op_348_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101965312)))];
+            tensor<fp16, [768]> var_349_to_fp16 = const()[name = string("op_349_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103145024)))];
+            tensor<fp16, [1, ?, 768]> linear_10_cast_fp16 = linear(bias = var_349_to_fp16, weight = var_348_to_fp16, x = var_329_cast_fp16)[name = string("linear_10_cast_fp16")];
+            tensor<int32, [3]> var_351_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_351_shape_cast_fp16")];
+            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
+            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
+            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
+            string var_351_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_351_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_351_shape_cast_fp16_to_uint16 = cast(dtype = var_351_shape_cast_fp16_to_uint16_dtype_0, x = var_351_shape_cast_fp16)[name = string("cast_148")];
+            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_351_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
+            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_147")];
+            int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")];
+            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")];
+            tensor<int32, [1]> concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor<int32, [1]>([1])];
+            int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
+            bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_24)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_26_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_26 = read_state(input = k_cache1)[name = string("coreml_update_state_26")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_25)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_27_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_27 = read_state(input = v_cache1)[name = string("coreml_update_state_27")];
+            int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)];
+            int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(768)];
+            int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)];
+            bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")];
+            tensor<int32, [3]> var_367_begin_0 = const()[name = string("op_367_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_367_end_mask_0 = const()[name = string("op_367_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_367_cast_fp16 = slice_by_index(begin = var_367_begin_0, end = concat_32, end_mask = var_367_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_367_cast_fp16")];
+            tensor<int32, [3]> var_370_begin_0 = const()[name = string("op_370_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_370_end_mask_0 = const()[name = string("op_370_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_370_cast_fp16 = slice_by_index(begin = var_370_begin_0, end = concat_32, end_mask = var_370_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_370_cast_fp16")];
+            tensor<int32, [4]> concat_34x = const()[name = string("concat_34x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_380_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_380_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_64_to_fp16 = const()[name = string("const_64_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_11_cast_fp16 = mul(x = var_380_cast_fp16, y = const_64_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> concat_35x = const()[name = string("concat_35x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_387_cast_fp16 = reshape(shape = concat_35x, x = var_367_cast_fp16)[name = string("op_387_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_65_to_fp16 = const()[name = string("const_65_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_15_cast_fp16 = mul(x = var_387_cast_fp16, y = const_65_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> concat_36x = const()[name = string("concat_36x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_394_cast_fp16 = reshape(shape = concat_36x, x = var_370_cast_fp16)[name = string("op_394_cast_fp16")];
+            tensor<int32, [4]> var_395 = const()[name = string("op_395"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_102 = transpose(perm = transpose_102_perm_0, x = k_15_cast_fp16)[name = string("transpose_230")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_101 = transpose(perm = transpose_101_perm_0, x = q_11_cast_fp16)[name = string("transpose_231")];
+            tensor<fp16, [1, 12, ?, ?]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_101, y = transpose_102)[name = string("qk_7_cast_fp16")];
+            int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)];
+            int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)];
+            bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")];
+            tensor<int32, [2]> var_398_begin_0 = const()[name = string("op_398_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_398_end_mask_0 = const()[name = string("op_398_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_398_cast_fp16 = slice_by_index(begin = var_398_begin_0, end = concat_37, end_mask = var_398_end_mask_0, x = mask_to_fp16)[name = string("op_398_cast_fp16")];
+            int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)];
+            int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
+            bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")];
+            tensor<int32, [2]> var_399_begin_0 = const()[name = string("op_399_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_399_end_mask_0 = const()[name = string("op_399_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_399_cast_fp16 = slice_by_index(begin = var_399_begin_0, end = concat_38, end_mask = var_399_end_mask_0, x = var_398_cast_fp16)[name = string("op_399_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_399_cast_fp16)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_402_cast_fp16 = softmax(axis = var_311, x = qk_9_cast_fp16)[name = string("op_402_cast_fp16")];
+            bool var_404_transpose_x_0 = const()[name = string("op_404_transpose_x_0"), val = bool(false)];
+            bool var_404_transpose_y_0 = const()[name = string("op_404_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_15_cast_fp16 = transpose(perm = var_395, x = var_394_cast_fp16)[name = string("transpose_232")];
+            tensor<fp16, [1, 12, ?, 64]> var_404_cast_fp16 = matmul(transpose_x = var_404_transpose_x_0, transpose_y = var_404_transpose_y_0, x = var_402_cast_fp16, y = v_15_cast_fp16)[name = string("op_404_cast_fp16")];
+            tensor<int32, [4]> var_405 = const()[name = string("op_405"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_39x = const()[name = string("concat_39x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_406_cast_fp16 = transpose(perm = var_405, x = var_404_cast_fp16)[name = string("transpose_229")];
+            tensor<fp16, [1, ?, 768]> x_25_cast_fp16 = reshape(shape = concat_39x, x = var_406_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<fp16, [768, 768]> var_410_to_fp16 = const()[name = string("op_410_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103146624)))];
+            tensor<fp16, [768]> var_411_to_fp16 = const()[name = string("op_411_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104326336)))];
+            tensor<fp16, [1, ?, 768]> linear_11_cast_fp16 = linear(bias = var_411_to_fp16, weight = var_410_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")];
+            tensor<int32, [1]> var_418_axes_0 = const()[name = string("op_418_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104327936)))];
+            tensor<fp16, [768]> blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104329536)))];
+            tensor<fp16, [1, ?, 768]> var_418_cast_fp16 = layer_norm(axes = var_418_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_317_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_418_cast_fp16")];
+            tensor<fp16, [768, 768]> var_427_to_fp16 = const()[name = string("op_427_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104331136)))];
+            tensor<fp16, [768]> var_428_to_fp16 = const()[name = string("op_428_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105510848)))];
+            tensor<fp16, [1, ?, 768]> linear_12_cast_fp16 = linear(bias = var_428_to_fp16, weight = var_427_to_fp16, x = var_418_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<int32, [3]> concat_40 = const()[name = string("concat_40"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_42 = const()[name = string("concat_42"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_43 = const()[name = string("concat_43"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_44x = const()[name = string("concat_44x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_448_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_448_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_66_to_fp16 = const()[name = string("const_66_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_15_cast_fp16 = mul(x = var_448_cast_fp16, y = const_66_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_454 = const()[name = string("op_454"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_455_cast_fp16 = reshape(shape = var_454, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_455_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_67_to_fp16 = const()[name = string("const_67_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_19_cast_fp16 = mul(x = var_455_cast_fp16, y = const_67_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_461 = const()[name = string("op_461"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_462_cast_fp16 = reshape(shape = var_461, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_462_cast_fp16")];
+            tensor<int32, [4]> var_463 = const()[name = string("op_463"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
+            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_104 = transpose(perm = transpose_104_perm_0, x = k_19_cast_fp16)[name = string("transpose_226")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_103 = transpose(perm = transpose_103_perm_0, x = q_15_cast_fp16)[name = string("transpose_227")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_103, y = transpose_104)[name = string("qk_11_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_467_cast_fp16 = softmax(axis = var_311, x = qk_11_cast_fp16)[name = string("op_467_cast_fp16")];
+            bool var_469_transpose_x_0 = const()[name = string("op_469_transpose_x_0"), val = bool(false)];
+            bool var_469_transpose_y_0 = const()[name = string("op_469_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_463, x = var_462_cast_fp16)[name = string("transpose_228")];
+            tensor<fp16, [1, 12, ?, 64]> var_469_cast_fp16 = matmul(transpose_x = var_469_transpose_x_0, transpose_y = var_469_transpose_y_0, x = var_467_cast_fp16, y = v_19_cast_fp16)[name = string("op_469_cast_fp16")];
+            tensor<int32, [4]> var_470 = const()[name = string("op_470"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_45x = const()[name = string("concat_45x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_471_cast_fp16 = transpose(perm = var_470, x = var_469_cast_fp16)[name = string("transpose_225")];
+            tensor<fp16, [1, ?, 768]> x_31_cast_fp16 = reshape(shape = concat_45x, x = var_471_cast_fp16)[name = string("x_31_cast_fp16")];
+            tensor<fp16, [768, 768]> var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105512448)))];
+            tensor<fp16, [768]> var_476_to_fp16 = const()[name = string("op_476_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106692160)))];
+            tensor<fp16, [1, ?, 768]> linear_13_cast_fp16 = linear(bias = var_476_to_fp16, weight = var_475_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")];
+            tensor<int32, [1]> var_483_axes_0 = const()[name = string("op_483_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106693760)))];
+            tensor<fp16, [768]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106695360)))];
+            tensor<fp16, [1, ?, 768]> var_483_cast_fp16 = layer_norm(axes = var_483_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_317_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_483_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_492_to_fp16 = const()[name = string("op_492_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106696960)))];
+            tensor<fp16, [3072]> var_493_to_fp16 = const()[name = string("op_493_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111415616)))];
+            tensor<fp16, [1, ?, 3072]> linear_14_cast_fp16 = linear(bias = var_493_to_fp16, weight = var_492_to_fp16, x = var_483_cast_fp16)[name = string("linear_14_cast_fp16")];
+            string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_498_to_fp16 = const()[name = string("op_498_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111421824)))];
+            tensor<fp16, [768]> var_499_to_fp16 = const()[name = string("op_499_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116140480)))];
+            tensor<fp16, [1, ?, 768]> linear_15_cast_fp16 = linear(bias = var_499_to_fp16, weight = var_498_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")];
+            tensor<int32, [4]> k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_26)[name = string("k_cache_9_cast_fp16")];
+            tensor<int32, [4]> v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_27)[name = string("v_cache_9_cast_fp16")];
+            tensor<int32, [4]> k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")];
+            tensor<int32, [4]> v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")];
+            int32 var_522 = const()[name = string("op_522"), val = int32(-1)];
+            tensor<int32, [1]> var_540_axes_0 = const()[name = string("op_540_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116142080)))];
+            tensor<fp16, [768]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116143680)))];
+            fp16 var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_540_cast_fp16 = layer_norm(axes = var_540_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_528_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_540_cast_fp16")];
+            tensor<fp16, [768, 768]> var_551_to_fp16 = const()[name = string("op_551_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116145280)))];
+            tensor<fp16, [768]> var_552_to_fp16 = const()[name = string("op_552_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117324992)))];
+            tensor<fp16, [1, ?, 768]> linear_16_cast_fp16 = linear(bias = var_552_to_fp16, weight = var_551_to_fp16, x = var_540_cast_fp16)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [768, 768]> var_555_to_fp16 = const()[name = string("op_555_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117326592)))];
+            tensor<fp16, [1, ?, 768]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_555_to_fp16, x = var_540_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [768, 768]> var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118506304)))];
+            tensor<fp16, [768]> var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119686016)))];
+            tensor<fp16, [1, ?, 768]> linear_18_cast_fp16 = linear(bias = var_560_to_fp16, weight = var_559_to_fp16, x = var_540_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<int32, [3]> var_562_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_562_shape_cast_fp16")];
+            int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
+            int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
+            bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
+            string var_562_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_562_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_562_shape_cast_fp16_to_uint16 = cast(dtype = var_562_shape_cast_fp16_to_uint16_dtype_0, x = var_562_shape_cast_fp16)[name = string("cast_146")];
+            uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_562_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
+            string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_145")];
+            int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")];
+            tensor<int32, [1]> expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")];
+            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([2])];
+            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
+            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")];
+            tensor<int32, [1]> concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)];
+            bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_26)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_28_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_28 = read_state(input = k_cache1)[name = string("coreml_update_state_28")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_27)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_29_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_29 = read_state(input = v_cache1)[name = string("coreml_update_state_29")];
+            int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)];
+            int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(768)];
+            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
+            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")];
+            tensor<int32, [3]> var_578_begin_0 = const()[name = string("op_578_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_578_end_mask_0 = const()[name = string("op_578_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = concat_54, end_mask = var_578_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_578_cast_fp16")];
+            tensor<int32, [3]> var_581_begin_0 = const()[name = string("op_581_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_581_end_mask_0 = const()[name = string("op_581_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_581_cast_fp16 = slice_by_index(begin = var_581_begin_0, end = concat_54, end_mask = var_581_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_581_cast_fp16")];
+            tensor<int32, [4]> concat_56x = const()[name = string("concat_56x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_591_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_591_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_68_to_fp16 = const()[name = string("const_68_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_19_cast_fp16 = mul(x = var_591_cast_fp16, y = const_68_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> concat_57x = const()[name = string("concat_57x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_598_cast_fp16 = reshape(shape = concat_57x, x = var_578_cast_fp16)[name = string("op_598_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_69_to_fp16 = const()[name = string("const_69_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_25_cast_fp16 = mul(x = var_598_cast_fp16, y = const_69_to_fp16)[name = string("k_25_cast_fp16")];
+            tensor<int32, [4]> concat_58x = const()[name = string("concat_58x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_605_cast_fp16 = reshape(shape = concat_58x, x = var_581_cast_fp16)[name = string("op_605_cast_fp16")];
+            tensor<int32, [4]> var_606 = const()[name = string("op_606"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
+            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_106 = transpose(perm = transpose_106_perm_0, x = k_25_cast_fp16)[name = string("transpose_222")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_105 = transpose(perm = transpose_105_perm_0, x = q_19_cast_fp16)[name = string("transpose_223")];
+            tensor<fp16, [1, 12, ?, ?]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_105, y = transpose_106)[name = string("qk_13_cast_fp16")];
+            int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)];
+            int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)];
+            bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")];
+            tensor<int32, [2]> var_609_begin_0 = const()[name = string("op_609_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_609_end_mask_0 = const()[name = string("op_609_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_609_cast_fp16 = slice_by_index(begin = var_609_begin_0, end = concat_59, end_mask = var_609_end_mask_0, x = mask_to_fp16)[name = string("op_609_cast_fp16")];
+            int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)];
+            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
+            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")];
+            tensor<int32, [2]> var_610_begin_0 = const()[name = string("op_610_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_610_end_mask_0 = const()[name = string("op_610_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_610_cast_fp16 = slice_by_index(begin = var_610_begin_0, end = concat_60, end_mask = var_610_end_mask_0, x = var_609_cast_fp16)[name = string("op_610_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_610_cast_fp16)[name = string("qk_15_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_613_cast_fp16 = softmax(axis = var_522, x = qk_15_cast_fp16)[name = string("op_613_cast_fp16")];
+            bool var_615_transpose_x_0 = const()[name = string("op_615_transpose_x_0"), val = bool(false)];
+            bool var_615_transpose_y_0 = const()[name = string("op_615_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_25_cast_fp16 = transpose(perm = var_606, x = var_605_cast_fp16)[name = string("transpose_224")];
+            tensor<fp16, [1, 12, ?, 64]> var_615_cast_fp16 = matmul(transpose_x = var_615_transpose_x_0, transpose_y = var_615_transpose_y_0, x = var_613_cast_fp16, y = v_25_cast_fp16)[name = string("op_615_cast_fp16")];
+            tensor<int32, [4]> var_616 = const()[name = string("op_616"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_61x = const()[name = string("concat_61x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_617_cast_fp16 = transpose(perm = var_616, x = var_615_cast_fp16)[name = string("transpose_221")];
+            tensor<fp16, [1, ?, 768]> x_43_cast_fp16 = reshape(shape = concat_61x, x = var_617_cast_fp16)[name = string("x_43_cast_fp16")];
+            tensor<fp16, [768, 768]> var_621_to_fp16 = const()[name = string("op_621_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119687616)))];
+            tensor<fp16, [768]> var_622_to_fp16 = const()[name = string("op_622_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120867328)))];
+            tensor<fp16, [1, ?, 768]> linear_19_cast_fp16 = linear(bias = var_622_to_fp16, weight = var_621_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")];
+            tensor<int32, [1]> var_629_axes_0 = const()[name = string("op_629_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120868928)))];
+            tensor<fp16, [768]> blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120870528)))];
+            tensor<fp16, [1, ?, 768]> var_629_cast_fp16 = layer_norm(axes = var_629_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_528_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_629_cast_fp16")];
+            tensor<fp16, [768, 768]> var_638_to_fp16 = const()[name = string("op_638_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120872128)))];
+            tensor<fp16, [768]> var_639_to_fp16 = const()[name = string("op_639_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122051840)))];
+            tensor<fp16, [1, ?, 768]> linear_20_cast_fp16 = linear(bias = var_639_to_fp16, weight = var_638_to_fp16, x = var_629_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [3]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_63 = const()[name = string("concat_63"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_64 = const()[name = string("concat_64"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_66x = const()[name = string("concat_66x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_659_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_659_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_70_to_fp16 = const()[name = string("const_70_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_23_cast_fp16 = mul(x = var_659_cast_fp16, y = const_70_to_fp16)[name = string("q_23_cast_fp16")];
+            tensor<int32, [4]> var_665 = const()[name = string("op_665"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_666_cast_fp16 = reshape(shape = var_665, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_666_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_71_to_fp16 = const()[name = string("const_71_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_29_cast_fp16 = mul(x = var_666_cast_fp16, y = const_71_to_fp16)[name = string("k_29_cast_fp16")];
+            tensor<int32, [4]> var_672 = const()[name = string("op_672"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_673_cast_fp16 = reshape(shape = var_672, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_673_cast_fp16")];
+            tensor<int32, [4]> var_674 = const()[name = string("op_674"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
+            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_108_perm_0 = const()[name = string("transpose_108_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_108 = transpose(perm = transpose_108_perm_0, x = k_29_cast_fp16)[name = string("transpose_218")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_107 = transpose(perm = transpose_107_perm_0, x = q_23_cast_fp16)[name = string("transpose_219")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_107, y = transpose_108)[name = string("qk_17_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_678_cast_fp16 = softmax(axis = var_522, x = qk_17_cast_fp16)[name = string("op_678_cast_fp16")];
+            bool var_680_transpose_x_0 = const()[name = string("op_680_transpose_x_0"), val = bool(false)];
+            bool var_680_transpose_y_0 = const()[name = string("op_680_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_29_cast_fp16 = transpose(perm = var_674, x = var_673_cast_fp16)[name = string("transpose_220")];
+            tensor<fp16, [1, 12, ?, 64]> var_680_cast_fp16 = matmul(transpose_x = var_680_transpose_x_0, transpose_y = var_680_transpose_y_0, x = var_678_cast_fp16, y = v_29_cast_fp16)[name = string("op_680_cast_fp16")];
+            tensor<int32, [4]> var_681 = const()[name = string("op_681"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_67x = const()[name = string("concat_67x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_682_cast_fp16 = transpose(perm = var_681, x = var_680_cast_fp16)[name = string("transpose_217")];
+            tensor<fp16, [1, ?, 768]> x_49_cast_fp16 = reshape(shape = concat_67x, x = var_682_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<fp16, [768, 768]> var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122053440)))];
+            tensor<fp16, [768]> var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123233152)))];
+            tensor<fp16, [1, ?, 768]> linear_21_cast_fp16 = linear(bias = var_687_to_fp16, weight = var_686_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")];
+            tensor<int32, [1]> var_694_axes_0 = const()[name = string("op_694_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123234752)))];
+            tensor<fp16, [768]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123236352)))];
+            tensor<fp16, [1, ?, 768]> var_694_cast_fp16 = layer_norm(axes = var_694_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_528_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_694_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_703_to_fp16 = const()[name = string("op_703_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123237952)))];
+            tensor<fp16, [3072]> var_704_to_fp16 = const()[name = string("op_704_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127956608)))];
+            tensor<fp16, [1, ?, 3072]> linear_22_cast_fp16 = linear(bias = var_704_to_fp16, weight = var_703_to_fp16, x = var_694_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_709_to_fp16 = const()[name = string("op_709_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127962816)))];
+            tensor<fp16, [768]> var_710_to_fp16 = const()[name = string("op_710_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132681472)))];
+            tensor<fp16, [1, ?, 768]> linear_23_cast_fp16 = linear(bias = var_710_to_fp16, weight = var_709_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")];
+            tensor<int32, [4]> k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_28)[name = string("k_cache_13_cast_fp16")];
+            tensor<int32, [4]> v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_29)[name = string("v_cache_13_cast_fp16")];
+            tensor<int32, [4]> k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")];
+            tensor<int32, [4]> v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")];
+            int32 var_733 = const()[name = string("op_733"), val = int32(-1)];
+            tensor<int32, [1]> var_751_axes_0 = const()[name = string("op_751_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132683072)))];
+            tensor<fp16, [768]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132684672)))];
+            fp16 var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_751_cast_fp16 = layer_norm(axes = var_751_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_739_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_751_cast_fp16")];
+            tensor<fp16, [768, 768]> var_762_to_fp16 = const()[name = string("op_762_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132686272)))];
+            tensor<fp16, [768]> var_763_to_fp16 = const()[name = string("op_763_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133865984)))];
+            tensor<fp16, [1, ?, 768]> linear_24_cast_fp16 = linear(bias = var_763_to_fp16, weight = var_762_to_fp16, x = var_751_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [768, 768]> var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133867584)))];
+            tensor<fp16, [1, ?, 768]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_766_to_fp16, x = var_751_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [768, 768]> var_770_to_fp16 = const()[name = string("op_770_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135047296)))];
+            tensor<fp16, [768]> var_771_to_fp16 = const()[name = string("op_771_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136227008)))];
+            tensor<fp16, [1, ?, 768]> linear_26_cast_fp16 = linear(bias = var_771_to_fp16, weight = var_770_to_fp16, x = var_751_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [3]> var_773_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_773_shape_cast_fp16")];
+            int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
+            int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
+            bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
+            string var_773_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_773_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_773_shape_cast_fp16_to_uint16 = cast(dtype = var_773_shape_cast_fp16_to_uint16_dtype_0, x = var_773_shape_cast_fp16)[name = string("cast_144")];
+            uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_773_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
+            string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_143")];
+            int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")];
+            tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")];
+            tensor<int32, [1]> concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor<int32, [1]>([3])];
+            int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)];
+            bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")];
+            tensor<int32, [1]> concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)];
+            bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_28)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_30_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_30 = read_state(input = k_cache1)[name = string("coreml_update_state_30")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_29)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_31_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_31 = read_state(input = v_cache1)[name = string("coreml_update_state_31")];
+            int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)];
+            int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(768)];
+            int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)];
+            bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")];
+            tensor<int32, [3]> var_789_begin_0 = const()[name = string("op_789_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_789_end_mask_0 = const()[name = string("op_789_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_789_cast_fp16 = slice_by_index(begin = var_789_begin_0, end = concat_76, end_mask = var_789_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_789_cast_fp16")];
+            tensor<int32, [3]> var_792_begin_0 = const()[name = string("op_792_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_792_end_mask_0 = const()[name = string("op_792_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_792_cast_fp16 = slice_by_index(begin = var_792_begin_0, end = concat_76, end_mask = var_792_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_792_cast_fp16")];
+            tensor<int32, [4]> concat_78x = const()[name = string("concat_78x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_802_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_802_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_72_to_fp16 = const()[name = string("const_72_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_27_cast_fp16 = mul(x = var_802_cast_fp16, y = const_72_to_fp16)[name = string("q_27_cast_fp16")];
+            tensor<int32, [4]> concat_79x = const()[name = string("concat_79x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_809_cast_fp16 = reshape(shape = concat_79x, x = var_789_cast_fp16)[name = string("op_809_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_73_to_fp16 = const()[name = string("const_73_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_35_cast_fp16 = mul(x = var_809_cast_fp16, y = const_73_to_fp16)[name = string("k_35_cast_fp16")];
+            tensor<int32, [4]> concat_80x = const()[name = string("concat_80x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_816_cast_fp16 = reshape(shape = concat_80x, x = var_792_cast_fp16)[name = string("op_816_cast_fp16")];
+            tensor<int32, [4]> var_817 = const()[name = string("op_817"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
+            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_109_perm_0 = const()[name = string("transpose_109_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_110_perm_0 = const()[name = string("transpose_110_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_110 = transpose(perm = transpose_110_perm_0, x = k_35_cast_fp16)[name = string("transpose_214")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_109 = transpose(perm = transpose_109_perm_0, x = q_27_cast_fp16)[name = string("transpose_215")];
+            tensor<fp16, [1, 12, ?, ?]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_109, y = transpose_110)[name = string("qk_19_cast_fp16")];
+            int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)];
+            int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
+            bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")];
+            tensor<int32, [2]> var_820_begin_0 = const()[name = string("op_820_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_820_end_mask_0 = const()[name = string("op_820_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_820_cast_fp16 = slice_by_index(begin = var_820_begin_0, end = concat_81, end_mask = var_820_end_mask_0, x = mask_to_fp16)[name = string("op_820_cast_fp16")];
+            int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)];
+            int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)];
+            bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")];
+            tensor<int32, [2]> var_821_begin_0 = const()[name = string("op_821_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_821_end_mask_0 = const()[name = string("op_821_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_821_cast_fp16 = slice_by_index(begin = var_821_begin_0, end = concat_82, end_mask = var_821_end_mask_0, x = var_820_cast_fp16)[name = string("op_821_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_821_cast_fp16)[name = string("qk_21_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_824_cast_fp16 = softmax(axis = var_733, x = qk_21_cast_fp16)[name = string("op_824_cast_fp16")];
+            bool var_826_transpose_x_0 = const()[name = string("op_826_transpose_x_0"), val = bool(false)];
+            bool var_826_transpose_y_0 = const()[name = string("op_826_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_35_cast_fp16 = transpose(perm = var_817, x = var_816_cast_fp16)[name = string("transpose_216")];
+            tensor<fp16, [1, 12, ?, 64]> var_826_cast_fp16 = matmul(transpose_x = var_826_transpose_x_0, transpose_y = var_826_transpose_y_0, x = var_824_cast_fp16, y = v_35_cast_fp16)[name = string("op_826_cast_fp16")];
+            tensor<int32, [4]> var_827 = const()[name = string("op_827"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_83x = const()[name = string("concat_83x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_828_cast_fp16 = transpose(perm = var_827, x = var_826_cast_fp16)[name = string("transpose_213")];
+            tensor<fp16, [1, ?, 768]> x_61_cast_fp16 = reshape(shape = concat_83x, x = var_828_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<fp16, [768, 768]> var_832_to_fp16 = const()[name = string("op_832_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136228608)))];
+            tensor<fp16, [768]> var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137408320)))];
+            tensor<fp16, [1, ?, 768]> linear_27_cast_fp16 = linear(bias = var_833_to_fp16, weight = var_832_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")];
+            tensor<int32, [1]> var_840_axes_0 = const()[name = string("op_840_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137409920)))];
+            tensor<fp16, [768]> blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137411520)))];
+            tensor<fp16, [1, ?, 768]> var_840_cast_fp16 = layer_norm(axes = var_840_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_739_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_840_cast_fp16")];
+            tensor<fp16, [768, 768]> var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137413120)))];
+            tensor<fp16, [768]> var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138592832)))];
+            tensor<fp16, [1, ?, 768]> linear_28_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = var_840_cast_fp16)[name = string("linear_28_cast_fp16")];
+            tensor<int32, [3]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_85 = const()[name = string("concat_85"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_87 = const()[name = string("concat_87"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_88x = const()[name = string("concat_88x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_870_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_870_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_74_to_fp16 = const()[name = string("const_74_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_31_cast_fp16 = mul(x = var_870_cast_fp16, y = const_74_to_fp16)[name = string("q_31_cast_fp16")];
+            tensor<int32, [4]> var_876 = const()[name = string("op_876"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_877_cast_fp16 = reshape(shape = var_876, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_877_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_75_to_fp16 = const()[name = string("const_75_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_39_cast_fp16 = mul(x = var_877_cast_fp16, y = const_75_to_fp16)[name = string("k_39_cast_fp16")];
+            tensor<int32, [4]> var_883 = const()[name = string("op_883"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_884_cast_fp16 = reshape(shape = var_883, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_884_cast_fp16")];
+            tensor<int32, [4]> var_885 = const()[name = string("op_885"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)];
+            bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_111_perm_0 = const()[name = string("transpose_111_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_112_perm_0 = const()[name = string("transpose_112_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_112 = transpose(perm = transpose_112_perm_0, x = k_39_cast_fp16)[name = string("transpose_210")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_111 = transpose(perm = transpose_111_perm_0, x = q_31_cast_fp16)[name = string("transpose_211")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_111, y = transpose_112)[name = string("qk_23_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_889_cast_fp16 = softmax(axis = var_733, x = qk_23_cast_fp16)[name = string("op_889_cast_fp16")];
+            bool var_891_transpose_x_0 = const()[name = string("op_891_transpose_x_0"), val = bool(false)];
+            bool var_891_transpose_y_0 = const()[name = string("op_891_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_39_cast_fp16 = transpose(perm = var_885, x = var_884_cast_fp16)[name = string("transpose_212")];
+            tensor<fp16, [1, 12, ?, 64]> var_891_cast_fp16 = matmul(transpose_x = var_891_transpose_x_0, transpose_y = var_891_transpose_y_0, x = var_889_cast_fp16, y = v_39_cast_fp16)[name = string("op_891_cast_fp16")];
+            tensor<int32, [4]> var_892 = const()[name = string("op_892"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_89x = const()[name = string("concat_89x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_893_cast_fp16 = transpose(perm = var_892, x = var_891_cast_fp16)[name = string("transpose_209")];
+            tensor<fp16, [1, ?, 768]> x_67_cast_fp16 = reshape(shape = concat_89x, x = var_893_cast_fp16)[name = string("x_67_cast_fp16")];
+            tensor<fp16, [768, 768]> var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138594432)))];
+            tensor<fp16, [768]> var_898_to_fp16 = const()[name = string("op_898_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139774144)))];
+            tensor<fp16, [1, ?, 768]> linear_29_cast_fp16 = linear(bias = var_898_to_fp16, weight = var_897_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")];
+            tensor<int32, [1]> var_905_axes_0 = const()[name = string("op_905_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139775744)))];
+            tensor<fp16, [768]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139777344)))];
+            tensor<fp16, [1, ?, 768]> var_905_cast_fp16 = layer_norm(axes = var_905_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_739_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_905_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_914_to_fp16 = const()[name = string("op_914_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139778944)))];
+            tensor<fp16, [3072]> var_915_to_fp16 = const()[name = string("op_915_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144497600)))];
+            tensor<fp16, [1, ?, 3072]> linear_30_cast_fp16 = linear(bias = var_915_to_fp16, weight = var_914_to_fp16, x = var_905_cast_fp16)[name = string("linear_30_cast_fp16")];
+            string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_920_to_fp16 = const()[name = string("op_920_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144503808)))];
+            tensor<fp16, [768]> var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149222464)))];
+            tensor<fp16, [1, ?, 768]> linear_31_cast_fp16 = linear(bias = var_921_to_fp16, weight = var_920_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")];
+            tensor<int32, [4]> k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_30)[name = string("k_cache_17_cast_fp16")];
+            tensor<int32, [4]> v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor<int32, [4]>([5, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_31)[name = string("v_cache_17_cast_fp16")];
+            tensor<int32, [4]> k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")];
+            tensor<int32, [4]> v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor<int32, [4]>([5, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")];
+            int32 var_944 = const()[name = string("op_944"), val = int32(-1)];
+            tensor<int32, [1]> var_962_axes_0 = const()[name = string("op_962_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149224064)))];
+            tensor<fp16, [768]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149225664)))];
+            fp16 var_950_to_fp16 = const()[name = string("op_950_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_962_cast_fp16 = layer_norm(axes = var_962_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_950_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_962_cast_fp16")];
+            tensor<fp16, [768, 768]> var_973_to_fp16 = const()[name = string("op_973_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149227264)))];
+            tensor<fp16, [768]> var_974_to_fp16 = const()[name = string("op_974_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150406976)))];
+            tensor<fp16, [1, ?, 768]> linear_32_cast_fp16 = linear(bias = var_974_to_fp16, weight = var_973_to_fp16, x = var_962_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<fp16, [768, 768]> var_977_to_fp16 = const()[name = string("op_977_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150408576)))];
+            tensor<fp16, [1, ?, 768]> linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_977_to_fp16, x = var_962_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<fp16, [768, 768]> var_981_to_fp16 = const()[name = string("op_981_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151588288)))];
+            tensor<fp16, [768]> var_982_to_fp16 = const()[name = string("op_982_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152768000)))];
+            tensor<fp16, [1, ?, 768]> linear_34_cast_fp16 = linear(bias = var_982_to_fp16, weight = var_981_to_fp16, x = var_962_cast_fp16)[name = string("linear_34_cast_fp16")];
+            tensor<int32, [3]> var_984_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_984_shape_cast_fp16")];
+            int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)];
+            int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)];
+            bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)];
+            string var_984_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_984_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_984_shape_cast_fp16_to_uint16 = cast(dtype = var_984_shape_cast_fp16_to_uint16_dtype_0, x = var_984_shape_cast_fp16)[name = string("cast_142")];
+            uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_984_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")];
+            string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_141")];
+            int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")];
+            tensor<int32, [1]> expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")];
+            tensor<int32, [1]> concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor<int32, [1]>([4])];
+            int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)];
+            bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")];
+            tensor<int32, [1]> concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
+            bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_30)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_32_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_32 = read_state(input = k_cache1)[name = string("coreml_update_state_32")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_31)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_33_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_33 = read_state(input = v_cache1)[name = string("coreml_update_state_33")];
+            int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)];
+            int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(768)];
+            int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)];
+            bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")];
+            tensor<int32, [3]> var_1000_begin_0 = const()[name = string("op_1000_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1000_end_mask_0 = const()[name = string("op_1000_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = concat_98, end_mask = var_1000_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1000_cast_fp16")];
+            tensor<int32, [3]> var_1003_begin_0 = const()[name = string("op_1003_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1003_end_mask_0 = const()[name = string("op_1003_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_1003_cast_fp16 = slice_by_index(begin = var_1003_begin_0, end = concat_98, end_mask = var_1003_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1003_cast_fp16")];
+            tensor<int32, [4]> concat_100x = const()[name = string("concat_100x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1013_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1013_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_76_to_fp16 = const()[name = string("const_76_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_35_cast_fp16 = mul(x = var_1013_cast_fp16, y = const_76_to_fp16)[name = string("q_35_cast_fp16")];
+            tensor<int32, [4]> concat_101x = const()[name = string("concat_101x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1020_cast_fp16 = reshape(shape = concat_101x, x = var_1000_cast_fp16)[name = string("op_1020_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_77_to_fp16 = const()[name = string("const_77_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_45_cast_fp16 = mul(x = var_1020_cast_fp16, y = const_77_to_fp16)[name = string("k_45_cast_fp16")];
+            tensor<int32, [4]> concat_102x = const()[name = string("concat_102x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1027_cast_fp16 = reshape(shape = concat_102x, x = var_1003_cast_fp16)[name = string("op_1027_cast_fp16")];
+            tensor<int32, [4]> var_1028 = const()[name = string("op_1028"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)];
+            bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_113_perm_0 = const()[name = string("transpose_113_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_114_perm_0 = const()[name = string("transpose_114_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_114 = transpose(perm = transpose_114_perm_0, x = k_45_cast_fp16)[name = string("transpose_206")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_113 = transpose(perm = transpose_113_perm_0, x = q_35_cast_fp16)[name = string("transpose_207")];
+            tensor<fp16, [1, 12, ?, ?]> qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_113, y = transpose_114)[name = string("qk_25_cast_fp16")];
+            int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)];
+            int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)];
+            bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")];
+            tensor<int32, [2]> var_1031_begin_0 = const()[name = string("op_1031_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1031_end_mask_0 = const()[name = string("op_1031_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1031_cast_fp16 = slice_by_index(begin = var_1031_begin_0, end = concat_103, end_mask = var_1031_end_mask_0, x = mask_to_fp16)[name = string("op_1031_cast_fp16")];
+            int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)];
+            int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)];
+            bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")];
+            tensor<int32, [2]> var_1032_begin_0 = const()[name = string("op_1032_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1032_end_mask_0 = const()[name = string("op_1032_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1032_cast_fp16 = slice_by_index(begin = var_1032_begin_0, end = concat_104, end_mask = var_1032_end_mask_0, x = var_1031_cast_fp16)[name = string("op_1032_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1032_cast_fp16)[name = string("qk_27_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_1035_cast_fp16 = softmax(axis = var_944, x = qk_27_cast_fp16)[name = string("op_1035_cast_fp16")];
+            bool var_1037_transpose_x_0 = const()[name = string("op_1037_transpose_x_0"), val = bool(false)];
+            bool var_1037_transpose_y_0 = const()[name = string("op_1037_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_45_cast_fp16 = transpose(perm = var_1028, x = var_1027_cast_fp16)[name = string("transpose_208")];
+            tensor<fp16, [1, 12, ?, 64]> var_1037_cast_fp16 = matmul(transpose_x = var_1037_transpose_x_0, transpose_y = var_1037_transpose_y_0, x = var_1035_cast_fp16, y = v_45_cast_fp16)[name = string("op_1037_cast_fp16")];
+            tensor<int32, [4]> var_1038 = const()[name = string("op_1038"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_105x = const()[name = string("concat_105x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_1039_cast_fp16 = transpose(perm = var_1038, x = var_1037_cast_fp16)[name = string("transpose_205")];
+            tensor<fp16, [1, ?, 768]> x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1039_cast_fp16)[name = string("x_79_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1043_to_fp16 = const()[name = string("op_1043_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152769600)))];
+            tensor<fp16, [768]> var_1044_to_fp16 = const()[name = string("op_1044_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153949312)))];
+            tensor<fp16, [1, ?, 768]> linear_35_cast_fp16 = linear(bias = var_1044_to_fp16, weight = var_1043_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")];
+            tensor<int32, [1]> var_1051_axes_0 = const()[name = string("op_1051_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153950912)))];
+            tensor<fp16, [768]> blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153952512)))];
+            tensor<fp16, [1, ?, 768]> var_1051_cast_fp16 = layer_norm(axes = var_1051_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_950_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1051_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1060_to_fp16 = const()[name = string("op_1060_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153954112)))];
+            tensor<fp16, [768]> var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155133824)))];
+            tensor<fp16, [1, ?, 768]> linear_36_cast_fp16 = linear(bias = var_1061_to_fp16, weight = var_1060_to_fp16, x = var_1051_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<int32, [3]> concat_106 = const()[name = string("concat_106"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_108 = const()[name = string("concat_108"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_109 = const()[name = string("concat_109"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_110x = const()[name = string("concat_110x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1081_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1081_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_78_to_fp16 = const()[name = string("const_78_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_39_cast_fp16 = mul(x = var_1081_cast_fp16, y = const_78_to_fp16)[name = string("q_39_cast_fp16")];
+            tensor<int32, [4]> var_1087 = const()[name = string("op_1087"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1088_cast_fp16 = reshape(shape = var_1087, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1088_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_79_to_fp16 = const()[name = string("const_79_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_49_cast_fp16 = mul(x = var_1088_cast_fp16, y = const_79_to_fp16)[name = string("k_49_cast_fp16")];
+            tensor<int32, [4]> var_1094 = const()[name = string("op_1094"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1095_cast_fp16 = reshape(shape = var_1094, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1095_cast_fp16")];
+            tensor<int32, [4]> var_1096 = const()[name = string("op_1096"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)];
+            bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_115_perm_0 = const()[name = string("transpose_115_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_116_perm_0 = const()[name = string("transpose_116_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_116 = transpose(perm = transpose_116_perm_0, x = k_49_cast_fp16)[name = string("transpose_202")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_115 = transpose(perm = transpose_115_perm_0, x = q_39_cast_fp16)[name = string("transpose_203")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_115, y = transpose_116)[name = string("qk_29_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_1100_cast_fp16 = softmax(axis = var_944, x = qk_29_cast_fp16)[name = string("op_1100_cast_fp16")];
+            bool var_1102_transpose_x_0 = const()[name = string("op_1102_transpose_x_0"), val = bool(false)];
+            bool var_1102_transpose_y_0 = const()[name = string("op_1102_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_49_cast_fp16 = transpose(perm = var_1096, x = var_1095_cast_fp16)[name = string("transpose_204")];
+            tensor<fp16, [1, 12, ?, 64]> var_1102_cast_fp16 = matmul(transpose_x = var_1102_transpose_x_0, transpose_y = var_1102_transpose_y_0, x = var_1100_cast_fp16, y = v_49_cast_fp16)[name = string("op_1102_cast_fp16")];
+            tensor<int32, [4]> var_1103 = const()[name = string("op_1103"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_111x = const()[name = string("concat_111x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_1104_cast_fp16 = transpose(perm = var_1103, x = var_1102_cast_fp16)[name = string("transpose_201")];
+            tensor<fp16, [1, ?, 768]> x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1104_cast_fp16)[name = string("x_85_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155135424)))];
+            tensor<fp16, [768]> var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156315136)))];
+            tensor<fp16, [1, ?, 768]> linear_37_cast_fp16 = linear(bias = var_1109_to_fp16, weight = var_1108_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")];
+            tensor<int32, [1]> var_1116_axes_0 = const()[name = string("op_1116_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156316736)))];
+            tensor<fp16, [768]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156318336)))];
+            tensor<fp16, [1, ?, 768]> var_1116_cast_fp16 = layer_norm(axes = var_1116_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_950_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1116_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_1125_to_fp16 = const()[name = string("op_1125_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156319936)))];
+            tensor<fp16, [3072]> var_1126_to_fp16 = const()[name = string("op_1126_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161038592)))];
+            tensor<fp16, [1, ?, 3072]> linear_38_cast_fp16 = linear(bias = var_1126_to_fp16, weight = var_1125_to_fp16, x = var_1116_cast_fp16)[name = string("linear_38_cast_fp16")];
+            string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_1131_to_fp16 = const()[name = string("op_1131_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161044800)))];
+            tensor<fp16, [768]> var_1132_to_fp16 = const()[name = string("op_1132_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165763456)))];
+            tensor<fp16, [1, ?, 768]> linear_39_cast_fp16 = linear(bias = var_1132_to_fp16, weight = var_1131_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")];
+            tensor<int32, [4]> k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_32)[name = string("k_cache_21_cast_fp16")];
+            tensor<int32, [4]> v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor<int32, [4]>([6, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_33)[name = string("v_cache_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")];
+            tensor<int32, [4]> v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor<int32, [4]>([6, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")];
+            int32 var_1155 = const()[name = string("op_1155"), val = int32(-1)];
+            tensor<int32, [1]> var_1173_axes_0 = const()[name = string("op_1173_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165765056)))];
+            tensor<fp16, [768]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165766656)))];
+            fp16 var_1161_to_fp16 = const()[name = string("op_1161_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_1173_cast_fp16 = layer_norm(axes = var_1173_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1161_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1173_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1184_to_fp16 = const()[name = string("op_1184_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165768256)))];
+            tensor<fp16, [768]> var_1185_to_fp16 = const()[name = string("op_1185_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166947968)))];
+            tensor<fp16, [1, ?, 768]> linear_40_cast_fp16 = linear(bias = var_1185_to_fp16, weight = var_1184_to_fp16, x = var_1173_cast_fp16)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1188_to_fp16 = const()[name = string("op_1188_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166949568)))];
+            tensor<fp16, [1, ?, 768]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1188_to_fp16, x = var_1173_cast_fp16)[name = string("linear_41_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1192_to_fp16 = const()[name = string("op_1192_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168129280)))];
+            tensor<fp16, [768]> var_1193_to_fp16 = const()[name = string("op_1193_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169308992)))];
+            tensor<fp16, [1, ?, 768]> linear_42_cast_fp16 = linear(bias = var_1193_to_fp16, weight = var_1192_to_fp16, x = var_1173_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<int32, [3]> var_1195_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1195_shape_cast_fp16")];
+            int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)];
+            int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)];
+            bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)];
+            string var_1195_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1195_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1195_shape_cast_fp16_to_uint16 = cast(dtype = var_1195_shape_cast_fp16_to_uint16_dtype_0, x = var_1195_shape_cast_fp16)[name = string("cast_140")];
+            uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1195_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")];
+            string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_139")];
+            int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")];
+            tensor<int32, [1]> expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")];
+            tensor<int32, [1]> concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor<int32, [1]>([5])];
+            int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
+            bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")];
+            tensor<int32, [1]> concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)];
+            bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_32)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_34_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_34 = read_state(input = k_cache1)[name = string("coreml_update_state_34")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_33)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_35_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_35 = read_state(input = v_cache1)[name = string("coreml_update_state_35")];
+            int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)];
+            int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(768)];
+            int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)];
+            bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")];
+            tensor<int32, [3]> var_1211_begin_0 = const()[name = string("op_1211_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1211_end_mask_0 = const()[name = string("op_1211_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_1211_cast_fp16 = slice_by_index(begin = var_1211_begin_0, end = concat_120, end_mask = var_1211_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1211_cast_fp16")];
+            tensor<int32, [3]> var_1214_begin_0 = const()[name = string("op_1214_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1214_end_mask_0 = const()[name = string("op_1214_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = concat_120, end_mask = var_1214_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1214_cast_fp16")];
+            tensor<int32, [4]> concat_122x = const()[name = string("concat_122x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1224_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1224_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_80_to_fp16 = const()[name = string("const_80_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_43_cast_fp16 = mul(x = var_1224_cast_fp16, y = const_80_to_fp16)[name = string("q_43_cast_fp16")];
+            tensor<int32, [4]> concat_123x = const()[name = string("concat_123x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1231_cast_fp16 = reshape(shape = concat_123x, x = var_1211_cast_fp16)[name = string("op_1231_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_81_to_fp16 = const()[name = string("const_81_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_55_cast_fp16 = mul(x = var_1231_cast_fp16, y = const_81_to_fp16)[name = string("k_55_cast_fp16")];
+            tensor<int32, [4]> concat_124x = const()[name = string("concat_124x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1238_cast_fp16 = reshape(shape = concat_124x, x = var_1214_cast_fp16)[name = string("op_1238_cast_fp16")];
+            tensor<int32, [4]> var_1239 = const()[name = string("op_1239"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)];
+            bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_117_perm_0 = const()[name = string("transpose_117_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_118_perm_0 = const()[name = string("transpose_118_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_118 = transpose(perm = transpose_118_perm_0, x = k_55_cast_fp16)[name = string("transpose_198")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_117 = transpose(perm = transpose_117_perm_0, x = q_43_cast_fp16)[name = string("transpose_199")];
+            tensor<fp16, [1, 12, ?, ?]> qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_117, y = transpose_118)[name = string("qk_31_cast_fp16")];
+            int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)];
+            int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)];
+            bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")];
+            tensor<int32, [2]> var_1242_begin_0 = const()[name = string("op_1242_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1242_end_mask_0 = const()[name = string("op_1242_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = concat_125, end_mask = var_1242_end_mask_0, x = mask_to_fp16)[name = string("op_1242_cast_fp16")];
+            int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)];
+            int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)];
+            bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")];
+            tensor<int32, [2]> var_1243_begin_0 = const()[name = string("op_1243_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1243_end_mask_0 = const()[name = string("op_1243_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1243_cast_fp16 = slice_by_index(begin = var_1243_begin_0, end = concat_126, end_mask = var_1243_end_mask_0, x = var_1242_cast_fp16)[name = string("op_1243_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1243_cast_fp16)[name = string("qk_33_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_1246_cast_fp16 = softmax(axis = var_1155, x = qk_33_cast_fp16)[name = string("op_1246_cast_fp16")];
+            bool var_1248_transpose_x_0 = const()[name = string("op_1248_transpose_x_0"), val = bool(false)];
+            bool var_1248_transpose_y_0 = const()[name = string("op_1248_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_55_cast_fp16 = transpose(perm = var_1239, x = var_1238_cast_fp16)[name = string("transpose_200")];
+            tensor<fp16, [1, 12, ?, 64]> var_1248_cast_fp16 = matmul(transpose_x = var_1248_transpose_x_0, transpose_y = var_1248_transpose_y_0, x = var_1246_cast_fp16, y = v_55_cast_fp16)[name = string("op_1248_cast_fp16")];
+            tensor<int32, [4]> var_1249 = const()[name = string("op_1249"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_127x = const()[name = string("concat_127x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_1250_cast_fp16 = transpose(perm = var_1249, x = var_1248_cast_fp16)[name = string("transpose_197")];
+            tensor<fp16, [1, ?, 768]> x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1250_cast_fp16)[name = string("x_97_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1254_to_fp16 = const()[name = string("op_1254_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169310592)))];
+            tensor<fp16, [768]> var_1255_to_fp16 = const()[name = string("op_1255_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170490304)))];
+            tensor<fp16, [1, ?, 768]> linear_43_cast_fp16 = linear(bias = var_1255_to_fp16, weight = var_1254_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")];
+            tensor<int32, [1]> var_1262_axes_0 = const()[name = string("op_1262_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170491904)))];
+            tensor<fp16, [768]> blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170493504)))];
+            tensor<fp16, [1, ?, 768]> var_1262_cast_fp16 = layer_norm(axes = var_1262_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1161_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1262_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1271_to_fp16 = const()[name = string("op_1271_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170495104)))];
+            tensor<fp16, [768]> var_1272_to_fp16 = const()[name = string("op_1272_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171674816)))];
+            tensor<fp16, [1, ?, 768]> linear_44_cast_fp16 = linear(bias = var_1272_to_fp16, weight = var_1271_to_fp16, x = var_1262_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [3]> concat_128 = const()[name = string("concat_128"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_129 = const()[name = string("concat_129"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_130 = const()[name = string("concat_130"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_131 = const()[name = string("concat_131"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_132x = const()[name = string("concat_132x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1292_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1292_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_82_to_fp16 = const()[name = string("const_82_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_47_cast_fp16 = mul(x = var_1292_cast_fp16, y = const_82_to_fp16)[name = string("q_47_cast_fp16")];
+            tensor<int32, [4]> var_1298 = const()[name = string("op_1298"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1299_cast_fp16 = reshape(shape = var_1298, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1299_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_83_to_fp16 = const()[name = string("const_83_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_59_cast_fp16 = mul(x = var_1299_cast_fp16, y = const_83_to_fp16)[name = string("k_59_cast_fp16")];
+            tensor<int32, [4]> var_1305 = const()[name = string("op_1305"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1306_cast_fp16 = reshape(shape = var_1305, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1306_cast_fp16")];
+            tensor<int32, [4]> var_1307 = const()[name = string("op_1307"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)];
+            bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_119_perm_0 = const()[name = string("transpose_119_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_120_perm_0 = const()[name = string("transpose_120_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_120 = transpose(perm = transpose_120_perm_0, x = k_59_cast_fp16)[name = string("transpose_194")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_119 = transpose(perm = transpose_119_perm_0, x = q_47_cast_fp16)[name = string("transpose_195")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_119, y = transpose_120)[name = string("qk_35_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_1311_cast_fp16 = softmax(axis = var_1155, x = qk_35_cast_fp16)[name = string("op_1311_cast_fp16")];
+            bool var_1313_transpose_x_0 = const()[name = string("op_1313_transpose_x_0"), val = bool(false)];
+            bool var_1313_transpose_y_0 = const()[name = string("op_1313_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_59_cast_fp16 = transpose(perm = var_1307, x = var_1306_cast_fp16)[name = string("transpose_196")];
+            tensor<fp16, [1, 12, ?, 64]> var_1313_cast_fp16 = matmul(transpose_x = var_1313_transpose_x_0, transpose_y = var_1313_transpose_y_0, x = var_1311_cast_fp16, y = v_59_cast_fp16)[name = string("op_1313_cast_fp16")];
+            tensor<int32, [4]> var_1314 = const()[name = string("op_1314"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_133x = const()[name = string("concat_133x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_1315_cast_fp16 = transpose(perm = var_1314, x = var_1313_cast_fp16)[name = string("transpose_193")];
+            tensor<fp16, [1, ?, 768]> x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1315_cast_fp16)[name = string("x_103_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1319_to_fp16 = const()[name = string("op_1319_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171676416)))];
+            tensor<fp16, [768]> var_1320_to_fp16 = const()[name = string("op_1320_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172856128)))];
+            tensor<fp16, [1, ?, 768]> linear_45_cast_fp16 = linear(bias = var_1320_to_fp16, weight = var_1319_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")];
+            tensor<int32, [1]> var_1327_axes_0 = const()[name = string("op_1327_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172857728)))];
+            tensor<fp16, [768]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172859328)))];
+            tensor<fp16, [1, ?, 768]> var_1327_cast_fp16 = layer_norm(axes = var_1327_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1161_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1327_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_1336_to_fp16 = const()[name = string("op_1336_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172860928)))];
+            tensor<fp16, [3072]> var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177579584)))];
+            tensor<fp16, [1, ?, 3072]> linear_46_cast_fp16 = linear(bias = var_1337_to_fp16, weight = var_1336_to_fp16, x = var_1327_cast_fp16)[name = string("linear_46_cast_fp16")];
+            string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_1342_to_fp16 = const()[name = string("op_1342_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177585792)))];
+            tensor<fp16, [768]> var_1343_to_fp16 = const()[name = string("op_1343_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182304448)))];
+            tensor<fp16, [1, ?, 768]> linear_47_cast_fp16 = linear(bias = var_1343_to_fp16, weight = var_1342_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")];
+            tensor<int32, [4]> k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor<int32, [4]>([7, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_34)[name = string("k_cache_25_cast_fp16")];
+            tensor<int32, [4]> v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor<int32, [4]>([7, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_35)[name = string("v_cache_25_cast_fp16")];
+            tensor<int32, [4]> k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor<int32, [4]>([7, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")];
+            tensor<int32, [4]> v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor<int32, [4]>([7, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")];
+            int32 var_1366 = const()[name = string("op_1366"), val = int32(-1)];
+            tensor<int32, [1]> var_1384_axes_0 = const()[name = string("op_1384_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182306048)))];
+            tensor<fp16, [768]> blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182307648)))];
+            fp16 var_1372_to_fp16 = const()[name = string("op_1372_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_1384_cast_fp16 = layer_norm(axes = var_1384_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1372_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1384_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1395_to_fp16 = const()[name = string("op_1395_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182309248)))];
+            tensor<fp16, [768]> var_1396_to_fp16 = const()[name = string("op_1396_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183488960)))];
+            tensor<fp16, [1, ?, 768]> linear_48_cast_fp16 = linear(bias = var_1396_to_fp16, weight = var_1395_to_fp16, x = var_1384_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1399_to_fp16 = const()[name = string("op_1399_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183490560)))];
+            tensor<fp16, [1, ?, 768]> linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1399_to_fp16, x = var_1384_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1403_to_fp16 = const()[name = string("op_1403_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184670272)))];
+            tensor<fp16, [768]> var_1404_to_fp16 = const()[name = string("op_1404_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185849984)))];
+            tensor<fp16, [1, ?, 768]> linear_50_cast_fp16 = linear(bias = var_1404_to_fp16, weight = var_1403_to_fp16, x = var_1384_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [3]> var_1406_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1406_shape_cast_fp16")];
+            int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)];
+            int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)];
+            bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)];
+            string var_1406_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1406_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1406_shape_cast_fp16_to_uint16 = cast(dtype = var_1406_shape_cast_fp16_to_uint16_dtype_0, x = var_1406_shape_cast_fp16)[name = string("cast_138")];
+            uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1406_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")];
+            string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_137")];
+            int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")];
+            tensor<int32, [1]> expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")];
+            tensor<int32, [1]> concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor<int32, [1]>([6])];
+            int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)];
+            bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")];
+            tensor<int32, [1]> concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)];
+            bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_34)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_36_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_36 = read_state(input = k_cache1)[name = string("coreml_update_state_36")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_35)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_37_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_37 = read_state(input = v_cache1)[name = string("coreml_update_state_37")];
+            int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)];
+            int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(768)];
+            int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)];
+            bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")];
+            tensor<int32, [3]> var_1422_begin_0 = const()[name = string("op_1422_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1422_end_mask_0 = const()[name = string("op_1422_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = concat_142, end_mask = var_1422_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1422_cast_fp16")];
+            tensor<int32, [3]> var_1425_begin_0 = const()[name = string("op_1425_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1425_end_mask_0 = const()[name = string("op_1425_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_1425_cast_fp16 = slice_by_index(begin = var_1425_begin_0, end = concat_142, end_mask = var_1425_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1425_cast_fp16")];
+            tensor<int32, [4]> concat_144x = const()[name = string("concat_144x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1435_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1435_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_84_to_fp16 = const()[name = string("const_84_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_51_cast_fp16 = mul(x = var_1435_cast_fp16, y = const_84_to_fp16)[name = string("q_51_cast_fp16")];
+            tensor<int32, [4]> concat_145x = const()[name = string("concat_145x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1442_cast_fp16 = reshape(shape = concat_145x, x = var_1422_cast_fp16)[name = string("op_1442_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_85_to_fp16 = const()[name = string("const_85_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_65_cast_fp16 = mul(x = var_1442_cast_fp16, y = const_85_to_fp16)[name = string("k_65_cast_fp16")];
+            tensor<int32, [4]> concat_146x = const()[name = string("concat_146x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1449_cast_fp16 = reshape(shape = concat_146x, x = var_1425_cast_fp16)[name = string("op_1449_cast_fp16")];
+            tensor<int32, [4]> var_1450 = const()[name = string("op_1450"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)];
+            bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_121_perm_0 = const()[name = string("transpose_121_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_122_perm_0 = const()[name = string("transpose_122_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_122 = transpose(perm = transpose_122_perm_0, x = k_65_cast_fp16)[name = string("transpose_190")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_121 = transpose(perm = transpose_121_perm_0, x = q_51_cast_fp16)[name = string("transpose_191")];
+            tensor<fp16, [1, 12, ?, ?]> qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_121, y = transpose_122)[name = string("qk_37_cast_fp16")];
+            int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)];
+            int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)];
+            bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")];
+            tensor<int32, [2]> var_1453_begin_0 = const()[name = string("op_1453_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1453_end_mask_0 = const()[name = string("op_1453_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1453_cast_fp16 = slice_by_index(begin = var_1453_begin_0, end = concat_147, end_mask = var_1453_end_mask_0, x = mask_to_fp16)[name = string("op_1453_cast_fp16")];
+            int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)];
+            int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)];
+            bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")];
+            tensor<int32, [2]> var_1454_begin_0 = const()[name = string("op_1454_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1454_end_mask_0 = const()[name = string("op_1454_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = concat_148, end_mask = var_1454_end_mask_0, x = var_1453_cast_fp16)[name = string("op_1454_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1454_cast_fp16)[name = string("qk_39_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_1457_cast_fp16 = softmax(axis = var_1366, x = qk_39_cast_fp16)[name = string("op_1457_cast_fp16")];
+            bool var_1459_transpose_x_0 = const()[name = string("op_1459_transpose_x_0"), val = bool(false)];
+            bool var_1459_transpose_y_0 = const()[name = string("op_1459_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_65_cast_fp16 = transpose(perm = var_1450, x = var_1449_cast_fp16)[name = string("transpose_192")];
+            tensor<fp16, [1, 12, ?, 64]> var_1459_cast_fp16 = matmul(transpose_x = var_1459_transpose_x_0, transpose_y = var_1459_transpose_y_0, x = var_1457_cast_fp16, y = v_65_cast_fp16)[name = string("op_1459_cast_fp16")];
+            tensor<int32, [4]> var_1460 = const()[name = string("op_1460"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_149x = const()[name = string("concat_149x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_1461_cast_fp16 = transpose(perm = var_1460, x = var_1459_cast_fp16)[name = string("transpose_189")];
+            tensor<fp16, [1, ?, 768]> x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1461_cast_fp16)[name = string("x_115_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1465_to_fp16 = const()[name = string("op_1465_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185851584)))];
+            tensor<fp16, [768]> var_1466_to_fp16 = const()[name = string("op_1466_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187031296)))];
+            tensor<fp16, [1, ?, 768]> linear_51_cast_fp16 = linear(bias = var_1466_to_fp16, weight = var_1465_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")];
+            tensor<int32, [1]> var_1473_axes_0 = const()[name = string("op_1473_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187032896)))];
+            tensor<fp16, [768]> blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187034496)))];
+            tensor<fp16, [1, ?, 768]> var_1473_cast_fp16 = layer_norm(axes = var_1473_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1372_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1473_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1482_to_fp16 = const()[name = string("op_1482_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187036096)))];
+            tensor<fp16, [768]> var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188215808)))];
+            tensor<fp16, [1, ?, 768]> linear_52_cast_fp16 = linear(bias = var_1483_to_fp16, weight = var_1482_to_fp16, x = var_1473_cast_fp16)[name = string("linear_52_cast_fp16")];
+            tensor<int32, [3]> concat_150 = const()[name = string("concat_150"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_151 = const()[name = string("concat_151"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_152 = const()[name = string("concat_152"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_153 = const()[name = string("concat_153"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_154x = const()[name = string("concat_154x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1503_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1503_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_86_to_fp16 = const()[name = string("const_86_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_55_cast_fp16 = mul(x = var_1503_cast_fp16, y = const_86_to_fp16)[name = string("q_55_cast_fp16")];
+            tensor<int32, [4]> var_1509 = const()[name = string("op_1509"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1510_cast_fp16 = reshape(shape = var_1509, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1510_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_87_to_fp16 = const()[name = string("const_87_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_69_cast_fp16 = mul(x = var_1510_cast_fp16, y = const_87_to_fp16)[name = string("k_69_cast_fp16")];
+            tensor<int32, [4]> var_1516 = const()[name = string("op_1516"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1517_cast_fp16 = reshape(shape = var_1516, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1517_cast_fp16")];
+            tensor<int32, [4]> var_1518 = const()[name = string("op_1518"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)];
+            bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_123_perm_0 = const()[name = string("transpose_123_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_124_perm_0 = const()[name = string("transpose_124_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_124 = transpose(perm = transpose_124_perm_0, x = k_69_cast_fp16)[name = string("transpose_186")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_123 = transpose(perm = transpose_123_perm_0, x = q_55_cast_fp16)[name = string("transpose_187")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_123, y = transpose_124)[name = string("qk_41_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_1522_cast_fp16 = softmax(axis = var_1366, x = qk_41_cast_fp16)[name = string("op_1522_cast_fp16")];
+            bool var_1524_transpose_x_0 = const()[name = string("op_1524_transpose_x_0"), val = bool(false)];
+            bool var_1524_transpose_y_0 = const()[name = string("op_1524_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_69_cast_fp16 = transpose(perm = var_1518, x = var_1517_cast_fp16)[name = string("transpose_188")];
+            tensor<fp16, [1, 12, ?, 64]> var_1524_cast_fp16 = matmul(transpose_x = var_1524_transpose_x_0, transpose_y = var_1524_transpose_y_0, x = var_1522_cast_fp16, y = v_69_cast_fp16)[name = string("op_1524_cast_fp16")];
+            tensor<int32, [4]> var_1525 = const()[name = string("op_1525"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_155x = const()[name = string("concat_155x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_1526_cast_fp16 = transpose(perm = var_1525, x = var_1524_cast_fp16)[name = string("transpose_185")];
+            tensor<fp16, [1, ?, 768]> x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1526_cast_fp16)[name = string("x_121_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188217408)))];
+            tensor<fp16, [768]> var_1531_to_fp16 = const()[name = string("op_1531_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189397120)))];
+            tensor<fp16, [1, ?, 768]> linear_53_cast_fp16 = linear(bias = var_1531_to_fp16, weight = var_1530_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")];
+            tensor<int32, [1]> var_1538_axes_0 = const()[name = string("op_1538_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189398720)))];
+            tensor<fp16, [768]> blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189400320)))];
+            tensor<fp16, [1, ?, 768]> var_1538_cast_fp16 = layer_norm(axes = var_1538_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1372_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1538_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_1547_to_fp16 = const()[name = string("op_1547_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189401920)))];
+            tensor<fp16, [3072]> var_1548_to_fp16 = const()[name = string("op_1548_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194120576)))];
+            tensor<fp16, [1, ?, 3072]> linear_54_cast_fp16 = linear(bias = var_1548_to_fp16, weight = var_1547_to_fp16, x = var_1538_cast_fp16)[name = string("linear_54_cast_fp16")];
+            string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_1553_to_fp16 = const()[name = string("op_1553_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194126784)))];
+            tensor<fp16, [768]> var_1554_to_fp16 = const()[name = string("op_1554_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198845440)))];
+            tensor<fp16, [1, ?, 768]> linear_55_cast_fp16 = linear(bias = var_1554_to_fp16, weight = var_1553_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")];
+            tensor<int32, [4]> k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor<int32, [4]>([8, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_36)[name = string("k_cache_29_cast_fp16")];
+            tensor<int32, [4]> v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor<int32, [4]>([8, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_37)[name = string("v_cache_29_cast_fp16")];
+            tensor<int32, [4]> k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor<int32, [4]>([8, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")];
+            tensor<int32, [4]> v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor<int32, [4]>([8, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")];
+            int32 var_1577 = const()[name = string("op_1577"), val = int32(-1)];
+            tensor<int32, [1]> var_1595_axes_0 = const()[name = string("op_1595_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198847040)))];
+            tensor<fp16, [768]> blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198848640)))];
+            fp16 var_1583_to_fp16 = const()[name = string("op_1583_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_1595_cast_fp16 = layer_norm(axes = var_1595_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1583_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1595_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1606_to_fp16 = const()[name = string("op_1606_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198850240)))];
+            tensor<fp16, [768]> var_1607_to_fp16 = const()[name = string("op_1607_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200029952)))];
+            tensor<fp16, [1, ?, 768]> linear_56_cast_fp16 = linear(bias = var_1607_to_fp16, weight = var_1606_to_fp16, x = var_1595_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200031552)))];
+            tensor<fp16, [1, ?, 768]> linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1610_to_fp16, x = var_1595_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1614_to_fp16 = const()[name = string("op_1614_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201211264)))];
+            tensor<fp16, [768]> var_1615_to_fp16 = const()[name = string("op_1615_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202390976)))];
+            tensor<fp16, [1, ?, 768]> linear_58_cast_fp16 = linear(bias = var_1615_to_fp16, weight = var_1614_to_fp16, x = var_1595_cast_fp16)[name = string("linear_58_cast_fp16")];
+            tensor<int32, [3]> var_1617_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1617_shape_cast_fp16")];
+            int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)];
+            int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)];
+            bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)];
+            string var_1617_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1617_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1617_shape_cast_fp16_to_uint16 = cast(dtype = var_1617_shape_cast_fp16_to_uint16_dtype_0, x = var_1617_shape_cast_fp16)[name = string("cast_136")];
+            uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1617_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")];
+            string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_135")];
+            int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")];
+            tensor<int32, [1]> expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")];
+            tensor<int32, [1]> concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor<int32, [1]>([7])];
+            int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)];
+            bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")];
+            tensor<int32, [1]> concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)];
+            bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_36)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_38_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_38 = read_state(input = k_cache1)[name = string("coreml_update_state_38")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_37)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_39_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_39 = read_state(input = v_cache1)[name = string("coreml_update_state_39")];
+            int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)];
+            int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(768)];
+            int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)];
+            bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")];
+            tensor<int32, [3]> var_1633_begin_0 = const()[name = string("op_1633_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1633_end_mask_0 = const()[name = string("op_1633_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_1633_cast_fp16 = slice_by_index(begin = var_1633_begin_0, end = concat_164, end_mask = var_1633_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1633_cast_fp16")];
+            tensor<int32, [3]> var_1636_begin_0 = const()[name = string("op_1636_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1636_end_mask_0 = const()[name = string("op_1636_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_1636_cast_fp16 = slice_by_index(begin = var_1636_begin_0, end = concat_164, end_mask = var_1636_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1636_cast_fp16")];
+            tensor<int32, [4]> concat_166x = const()[name = string("concat_166x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1646_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1646_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_59_cast_fp16 = mul(x = var_1646_cast_fp16, y = const_88_to_fp16)[name = string("q_59_cast_fp16")];
+            tensor<int32, [4]> concat_167x = const()[name = string("concat_167x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1653_cast_fp16 = reshape(shape = concat_167x, x = var_1633_cast_fp16)[name = string("op_1653_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_89_to_fp16 = const()[name = string("const_89_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_75_cast_fp16 = mul(x = var_1653_cast_fp16, y = const_89_to_fp16)[name = string("k_75_cast_fp16")];
+            tensor<int32, [4]> concat_168x = const()[name = string("concat_168x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1660_cast_fp16 = reshape(shape = concat_168x, x = var_1636_cast_fp16)[name = string("op_1660_cast_fp16")];
+            tensor<int32, [4]> var_1661 = const()[name = string("op_1661"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)];
+            bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_125_perm_0 = const()[name = string("transpose_125_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_126_perm_0 = const()[name = string("transpose_126_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_126 = transpose(perm = transpose_126_perm_0, x = k_75_cast_fp16)[name = string("transpose_182")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_125 = transpose(perm = transpose_125_perm_0, x = q_59_cast_fp16)[name = string("transpose_183")];
+            tensor<fp16, [1, 12, ?, ?]> qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_125, y = transpose_126)[name = string("qk_43_cast_fp16")];
+            int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)];
+            int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)];
+            bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")];
+            tensor<int32, [2]> var_1664_begin_0 = const()[name = string("op_1664_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1664_end_mask_0 = const()[name = string("op_1664_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1664_cast_fp16 = slice_by_index(begin = var_1664_begin_0, end = concat_169, end_mask = var_1664_end_mask_0, x = mask_to_fp16)[name = string("op_1664_cast_fp16")];
+            int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)];
+            int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)];
+            bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")];
+            tensor<int32, [2]> var_1665_begin_0 = const()[name = string("op_1665_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1665_end_mask_0 = const()[name = string("op_1665_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1665_cast_fp16 = slice_by_index(begin = var_1665_begin_0, end = concat_170, end_mask = var_1665_end_mask_0, x = var_1664_cast_fp16)[name = string("op_1665_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1665_cast_fp16)[name = string("qk_45_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_1668_cast_fp16 = softmax(axis = var_1577, x = qk_45_cast_fp16)[name = string("op_1668_cast_fp16")];
+            bool var_1670_transpose_x_0 = const()[name = string("op_1670_transpose_x_0"), val = bool(false)];
+            bool var_1670_transpose_y_0 = const()[name = string("op_1670_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_75_cast_fp16 = transpose(perm = var_1661, x = var_1660_cast_fp16)[name = string("transpose_184")];
+            tensor<fp16, [1, 12, ?, 64]> var_1670_cast_fp16 = matmul(transpose_x = var_1670_transpose_x_0, transpose_y = var_1670_transpose_y_0, x = var_1668_cast_fp16, y = v_75_cast_fp16)[name = string("op_1670_cast_fp16")];
+            tensor<int32, [4]> var_1671 = const()[name = string("op_1671"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_171x = const()[name = string("concat_171x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_1672_cast_fp16 = transpose(perm = var_1671, x = var_1670_cast_fp16)[name = string("transpose_181")];
+            tensor<fp16, [1, ?, 768]> x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1672_cast_fp16)[name = string("x_133_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1676_to_fp16 = const()[name = string("op_1676_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202392576)))];
+            tensor<fp16, [768]> var_1677_to_fp16 = const()[name = string("op_1677_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203572288)))];
+            tensor<fp16, [1, ?, 768]> linear_59_cast_fp16 = linear(bias = var_1677_to_fp16, weight = var_1676_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")];
+            tensor<int32, [1]> var_1684_axes_0 = const()[name = string("op_1684_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203573888)))];
+            tensor<fp16, [768]> blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203575488)))];
+            tensor<fp16, [1, ?, 768]> var_1684_cast_fp16 = layer_norm(axes = var_1684_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1583_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1684_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1693_to_fp16 = const()[name = string("op_1693_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203577088)))];
+            tensor<fp16, [768]> var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204756800)))];
+            tensor<fp16, [1, ?, 768]> linear_60_cast_fp16 = linear(bias = var_1694_to_fp16, weight = var_1693_to_fp16, x = var_1684_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<int32, [3]> concat_172 = const()[name = string("concat_172"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_173 = const()[name = string("concat_173"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_174 = const()[name = string("concat_174"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_175 = const()[name = string("concat_175"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_176x = const()[name = string("concat_176x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1714_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1714_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_90_to_fp16 = const()[name = string("const_90_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_63_cast_fp16 = mul(x = var_1714_cast_fp16, y = const_90_to_fp16)[name = string("q_63_cast_fp16")];
+            tensor<int32, [4]> var_1720 = const()[name = string("op_1720"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1721_cast_fp16 = reshape(shape = var_1720, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1721_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_91_to_fp16 = const()[name = string("const_91_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_79_cast_fp16 = mul(x = var_1721_cast_fp16, y = const_91_to_fp16)[name = string("k_79_cast_fp16")];
+            tensor<int32, [4]> var_1727 = const()[name = string("op_1727"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1728_cast_fp16 = reshape(shape = var_1727, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1728_cast_fp16")];
+            tensor<int32, [4]> var_1729 = const()[name = string("op_1729"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)];
+            bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_127_perm_0 = const()[name = string("transpose_127_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_128_perm_0 = const()[name = string("transpose_128_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_128 = transpose(perm = transpose_128_perm_0, x = k_79_cast_fp16)[name = string("transpose_178")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_127 = transpose(perm = transpose_127_perm_0, x = q_63_cast_fp16)[name = string("transpose_179")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_127, y = transpose_128)[name = string("qk_47_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_1733_cast_fp16 = softmax(axis = var_1577, x = qk_47_cast_fp16)[name = string("op_1733_cast_fp16")];
+            bool var_1735_transpose_x_0 = const()[name = string("op_1735_transpose_x_0"), val = bool(false)];
+            bool var_1735_transpose_y_0 = const()[name = string("op_1735_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_79_cast_fp16 = transpose(perm = var_1729, x = var_1728_cast_fp16)[name = string("transpose_180")];
+            tensor<fp16, [1, 12, ?, 64]> var_1735_cast_fp16 = matmul(transpose_x = var_1735_transpose_x_0, transpose_y = var_1735_transpose_y_0, x = var_1733_cast_fp16, y = v_79_cast_fp16)[name = string("op_1735_cast_fp16")];
+            tensor<int32, [4]> var_1736 = const()[name = string("op_1736"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_177x = const()[name = string("concat_177x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_1737_cast_fp16 = transpose(perm = var_1736, x = var_1735_cast_fp16)[name = string("transpose_177")];
+            tensor<fp16, [1, ?, 768]> x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1737_cast_fp16)[name = string("x_139_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1741_to_fp16 = const()[name = string("op_1741_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204758400)))];
+            tensor<fp16, [768]> var_1742_to_fp16 = const()[name = string("op_1742_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205938112)))];
+            tensor<fp16, [1, ?, 768]> linear_61_cast_fp16 = linear(bias = var_1742_to_fp16, weight = var_1741_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")];
+            tensor<int32, [1]> var_1749_axes_0 = const()[name = string("op_1749_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205939712)))];
+            tensor<fp16, [768]> blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205941312)))];
+            tensor<fp16, [1, ?, 768]> var_1749_cast_fp16 = layer_norm(axes = var_1749_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1583_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1749_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_1758_to_fp16 = const()[name = string("op_1758_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205942912)))];
+            tensor<fp16, [3072]> var_1759_to_fp16 = const()[name = string("op_1759_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210661568)))];
+            tensor<fp16, [1, ?, 3072]> linear_62_cast_fp16 = linear(bias = var_1759_to_fp16, weight = var_1758_to_fp16, x = var_1749_cast_fp16)[name = string("linear_62_cast_fp16")];
+            string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_1764_to_fp16 = const()[name = string("op_1764_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210667776)))];
+            tensor<fp16, [768]> var_1765_to_fp16 = const()[name = string("op_1765_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215386432)))];
+            tensor<fp16, [1, ?, 768]> linear_63_cast_fp16 = linear(bias = var_1765_to_fp16, weight = var_1764_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")];
+            tensor<int32, [4]> k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor<int32, [4]>([9, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_38)[name = string("k_cache_33_cast_fp16")];
+            tensor<int32, [4]> v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor<int32, [4]>([9, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_39)[name = string("v_cache_33_cast_fp16")];
+            tensor<int32, [4]> k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor<int32, [4]>([9, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")];
+            tensor<int32, [4]> v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor<int32, [4]>([9, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")];
+            int32 var_1788 = const()[name = string("op_1788"), val = int32(-1)];
+            tensor<int32, [1]> var_1806_axes_0 = const()[name = string("op_1806_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215388032)))];
+            tensor<fp16, [768]> blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215389632)))];
+            fp16 var_1794_to_fp16 = const()[name = string("op_1794_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_1806_cast_fp16 = layer_norm(axes = var_1806_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1794_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1806_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1817_to_fp16 = const()[name = string("op_1817_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215391232)))];
+            tensor<fp16, [768]> var_1818_to_fp16 = const()[name = string("op_1818_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216570944)))];
+            tensor<fp16, [1, ?, 768]> linear_64_cast_fp16 = linear(bias = var_1818_to_fp16, weight = var_1817_to_fp16, x = var_1806_cast_fp16)[name = string("linear_64_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1821_to_fp16 = const()[name = string("op_1821_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216572544)))];
+            tensor<fp16, [1, ?, 768]> linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1821_to_fp16, x = var_1806_cast_fp16)[name = string("linear_65_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1825_to_fp16 = const()[name = string("op_1825_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217752256)))];
+            tensor<fp16, [768]> var_1826_to_fp16 = const()[name = string("op_1826_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218931968)))];
+            tensor<fp16, [1, ?, 768]> linear_66_cast_fp16 = linear(bias = var_1826_to_fp16, weight = var_1825_to_fp16, x = var_1806_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<int32, [3]> var_1828_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1828_shape_cast_fp16")];
+            int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)];
+            int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)];
+            bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)];
+            string var_1828_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1828_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_1828_shape_cast_fp16_to_uint16 = cast(dtype = var_1828_shape_cast_fp16_to_uint16_dtype_0, x = var_1828_shape_cast_fp16)[name = string("cast_134")];
+            uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1828_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")];
+            string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_133")];
+            int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")];
+            tensor<int32, [1]> expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")];
+            tensor<int32, [1]> concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor<int32, [1]>([8])];
+            int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)];
+            bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")];
+            tensor<int32, [1]> concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)];
+            bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_38)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_40_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_40 = read_state(input = k_cache1)[name = string("coreml_update_state_40")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_39)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_41_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_41 = read_state(input = v_cache1)[name = string("coreml_update_state_41")];
+            int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)];
+            int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(768)];
+            int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)];
+            bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")];
+            tensor<int32, [3]> var_1844_begin_0 = const()[name = string("op_1844_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1844_end_mask_0 = const()[name = string("op_1844_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_1844_cast_fp16 = slice_by_index(begin = var_1844_begin_0, end = concat_186, end_mask = var_1844_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1844_cast_fp16")];
+            tensor<int32, [3]> var_1847_begin_0 = const()[name = string("op_1847_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_1847_end_mask_0 = const()[name = string("op_1847_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_1847_cast_fp16 = slice_by_index(begin = var_1847_begin_0, end = concat_186, end_mask = var_1847_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1847_cast_fp16")];
+            tensor<int32, [4]> concat_188x = const()[name = string("concat_188x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1857_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1857_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_92_to_fp16 = const()[name = string("const_92_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_67_cast_fp16 = mul(x = var_1857_cast_fp16, y = const_92_to_fp16)[name = string("q_67_cast_fp16")];
+            tensor<int32, [4]> concat_189x = const()[name = string("concat_189x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1864_cast_fp16 = reshape(shape = concat_189x, x = var_1844_cast_fp16)[name = string("op_1864_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_93_to_fp16 = const()[name = string("const_93_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_85_cast_fp16 = mul(x = var_1864_cast_fp16, y = const_93_to_fp16)[name = string("k_85_cast_fp16")];
+            tensor<int32, [4]> concat_190x = const()[name = string("concat_190x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1871_cast_fp16 = reshape(shape = concat_190x, x = var_1847_cast_fp16)[name = string("op_1871_cast_fp16")];
+            tensor<int32, [4]> var_1872 = const()[name = string("op_1872"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)];
+            bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_129_perm_0 = const()[name = string("transpose_129_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_130_perm_0 = const()[name = string("transpose_130_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_130 = transpose(perm = transpose_130_perm_0, x = k_85_cast_fp16)[name = string("transpose_174")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_129 = transpose(perm = transpose_129_perm_0, x = q_67_cast_fp16)[name = string("transpose_175")];
+            tensor<fp16, [1, 12, ?, ?]> qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_129, y = transpose_130)[name = string("qk_49_cast_fp16")];
+            int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)];
+            int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)];
+            bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")];
+            tensor<int32, [2]> var_1875_begin_0 = const()[name = string("op_1875_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1875_end_mask_0 = const()[name = string("op_1875_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_1875_cast_fp16 = slice_by_index(begin = var_1875_begin_0, end = concat_191, end_mask = var_1875_end_mask_0, x = mask_to_fp16)[name = string("op_1875_cast_fp16")];
+            int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)];
+            int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)];
+            bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")];
+            tensor<int32, [2]> var_1876_begin_0 = const()[name = string("op_1876_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_1876_end_mask_0 = const()[name = string("op_1876_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_1876_cast_fp16 = slice_by_index(begin = var_1876_begin_0, end = concat_192, end_mask = var_1876_end_mask_0, x = var_1875_cast_fp16)[name = string("op_1876_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1876_cast_fp16)[name = string("qk_51_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_1879_cast_fp16 = softmax(axis = var_1788, x = qk_51_cast_fp16)[name = string("op_1879_cast_fp16")];
+            bool var_1881_transpose_x_0 = const()[name = string("op_1881_transpose_x_0"), val = bool(false)];
+            bool var_1881_transpose_y_0 = const()[name = string("op_1881_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_85_cast_fp16 = transpose(perm = var_1872, x = var_1871_cast_fp16)[name = string("transpose_176")];
+            tensor<fp16, [1, 12, ?, 64]> var_1881_cast_fp16 = matmul(transpose_x = var_1881_transpose_x_0, transpose_y = var_1881_transpose_y_0, x = var_1879_cast_fp16, y = v_85_cast_fp16)[name = string("op_1881_cast_fp16")];
+            tensor<int32, [4]> var_1882 = const()[name = string("op_1882"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_193x = const()[name = string("concat_193x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_1883_cast_fp16 = transpose(perm = var_1882, x = var_1881_cast_fp16)[name = string("transpose_173")];
+            tensor<fp16, [1, ?, 768]> x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1883_cast_fp16)[name = string("x_151_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1887_to_fp16 = const()[name = string("op_1887_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218933568)))];
+            tensor<fp16, [768]> var_1888_to_fp16 = const()[name = string("op_1888_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220113280)))];
+            tensor<fp16, [1, ?, 768]> linear_67_cast_fp16 = linear(bias = var_1888_to_fp16, weight = var_1887_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")];
+            tensor<int32, [1]> var_1895_axes_0 = const()[name = string("op_1895_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220114880)))];
+            tensor<fp16, [768]> blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220116480)))];
+            tensor<fp16, [1, ?, 768]> var_1895_cast_fp16 = layer_norm(axes = var_1895_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1794_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1895_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1904_to_fp16 = const()[name = string("op_1904_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220118080)))];
+            tensor<fp16, [768]> var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221297792)))];
+            tensor<fp16, [1, ?, 768]> linear_68_cast_fp16 = linear(bias = var_1905_to_fp16, weight = var_1904_to_fp16, x = var_1895_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [3]> concat_194 = const()[name = string("concat_194"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_195 = const()[name = string("concat_195"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_196 = const()[name = string("concat_196"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_197 = const()[name = string("concat_197"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_198x = const()[name = string("concat_198x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_1925_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_1925_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_94_to_fp16 = const()[name = string("const_94_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_71_cast_fp16 = mul(x = var_1925_cast_fp16, y = const_94_to_fp16)[name = string("q_71_cast_fp16")];
+            tensor<int32, [4]> var_1931 = const()[name = string("op_1931"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1932_cast_fp16 = reshape(shape = var_1931, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1932_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_95_to_fp16 = const()[name = string("const_95_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_89_cast_fp16 = mul(x = var_1932_cast_fp16, y = const_95_to_fp16)[name = string("k_89_cast_fp16")];
+            tensor<int32, [4]> var_1938 = const()[name = string("op_1938"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1939_cast_fp16 = reshape(shape = var_1938, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1939_cast_fp16")];
+            tensor<int32, [4]> var_1940 = const()[name = string("op_1940"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)];
+            bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_131_perm_0 = const()[name = string("transpose_131_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_132_perm_0 = const()[name = string("transpose_132_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_132 = transpose(perm = transpose_132_perm_0, x = k_89_cast_fp16)[name = string("transpose_170")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_131 = transpose(perm = transpose_131_perm_0, x = q_71_cast_fp16)[name = string("transpose_171")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_131, y = transpose_132)[name = string("qk_53_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_1944_cast_fp16 = softmax(axis = var_1788, x = qk_53_cast_fp16)[name = string("op_1944_cast_fp16")];
+            bool var_1946_transpose_x_0 = const()[name = string("op_1946_transpose_x_0"), val = bool(false)];
+            bool var_1946_transpose_y_0 = const()[name = string("op_1946_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_89_cast_fp16 = transpose(perm = var_1940, x = var_1939_cast_fp16)[name = string("transpose_172")];
+            tensor<fp16, [1, 12, ?, 64]> var_1946_cast_fp16 = matmul(transpose_x = var_1946_transpose_x_0, transpose_y = var_1946_transpose_y_0, x = var_1944_cast_fp16, y = v_89_cast_fp16)[name = string("op_1946_cast_fp16")];
+            tensor<int32, [4]> var_1947 = const()[name = string("op_1947"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_199x = const()[name = string("concat_199x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_1948_cast_fp16 = transpose(perm = var_1947, x = var_1946_cast_fp16)[name = string("transpose_169")];
+            tensor<fp16, [1, ?, 768]> x_157_cast_fp16 = reshape(shape = concat_199x, x = var_1948_cast_fp16)[name = string("x_157_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1952_to_fp16 = const()[name = string("op_1952_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221299392)))];
+            tensor<fp16, [768]> var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222479104)))];
+            tensor<fp16, [1, ?, 768]> linear_69_cast_fp16 = linear(bias = var_1953_to_fp16, weight = var_1952_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")];
+            tensor<int32, [1]> var_1960_axes_0 = const()[name = string("op_1960_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222480704)))];
+            tensor<fp16, [768]> blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222482304)))];
+            tensor<fp16, [1, ?, 768]> var_1960_cast_fp16 = layer_norm(axes = var_1960_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1794_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_1960_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_1969_to_fp16 = const()[name = string("op_1969_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222483904)))];
+            tensor<fp16, [3072]> var_1970_to_fp16 = const()[name = string("op_1970_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227202560)))];
+            tensor<fp16, [1, ?, 3072]> linear_70_cast_fp16 = linear(bias = var_1970_to_fp16, weight = var_1969_to_fp16, x = var_1960_cast_fp16)[name = string("linear_70_cast_fp16")];
+            string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_1975_to_fp16 = const()[name = string("op_1975_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227208768)))];
+            tensor<fp16, [768]> var_1976_to_fp16 = const()[name = string("op_1976_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231927424)))];
+            tensor<fp16, [1, ?, 768]> linear_71_cast_fp16 = linear(bias = var_1976_to_fp16, weight = var_1975_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")];
+            tensor<int32, [4]> k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor<int32, [4]>([10, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_40)[name = string("k_cache_37_cast_fp16")];
+            tensor<int32, [4]> v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor<int32, [4]>([10, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_41)[name = string("v_cache_37_cast_fp16")];
+            tensor<int32, [4]> k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor<int32, [4]>([10, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")];
+            tensor<int32, [4]> v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor<int32, [4]>([10, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")];
+            int32 var_1999 = const()[name = string("op_1999"), val = int32(-1)];
+            tensor<int32, [1]> var_2017_axes_0 = const()[name = string("op_2017_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231929024)))];
+            tensor<fp16, [768]> blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231930624)))];
+            fp16 var_2005_to_fp16 = const()[name = string("op_2005_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_2017_cast_fp16 = layer_norm(axes = var_2017_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2005_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2017_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2028_to_fp16 = const()[name = string("op_2028_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231932224)))];
+            tensor<fp16, [768]> var_2029_to_fp16 = const()[name = string("op_2029_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233111936)))];
+            tensor<fp16, [1, ?, 768]> linear_72_cast_fp16 = linear(bias = var_2029_to_fp16, weight = var_2028_to_fp16, x = var_2017_cast_fp16)[name = string("linear_72_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2032_to_fp16 = const()[name = string("op_2032_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233113536)))];
+            tensor<fp16, [1, ?, 768]> linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2032_to_fp16, x = var_2017_cast_fp16)[name = string("linear_73_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2036_to_fp16 = const()[name = string("op_2036_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234293248)))];
+            tensor<fp16, [768]> var_2037_to_fp16 = const()[name = string("op_2037_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235472960)))];
+            tensor<fp16, [1, ?, 768]> linear_74_cast_fp16 = linear(bias = var_2037_to_fp16, weight = var_2036_to_fp16, x = var_2017_cast_fp16)[name = string("linear_74_cast_fp16")];
+            tensor<int32, [3]> var_2039_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2039_shape_cast_fp16")];
+            int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)];
+            int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)];
+            bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)];
+            string var_2039_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2039_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2039_shape_cast_fp16_to_uint16 = cast(dtype = var_2039_shape_cast_fp16_to_uint16_dtype_0, x = var_2039_shape_cast_fp16)[name = string("cast_132")];
+            uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2039_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")];
+            string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_131")];
+            int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")];
+            tensor<int32, [1]> expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")];
+            tensor<int32, [1]> concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor<int32, [1]>([9])];
+            int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)];
+            bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")];
+            tensor<int32, [1]> concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)];
+            bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_40)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_42_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_42 = read_state(input = k_cache1)[name = string("coreml_update_state_42")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_41)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_43_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_43 = read_state(input = v_cache1)[name = string("coreml_update_state_43")];
+            int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)];
+            int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(768)];
+            int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)];
+            bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")];
+            tensor<int32, [3]> var_2055_begin_0 = const()[name = string("op_2055_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2055_end_mask_0 = const()[name = string("op_2055_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_2055_cast_fp16 = slice_by_index(begin = var_2055_begin_0, end = concat_208, end_mask = var_2055_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2055_cast_fp16")];
+            tensor<int32, [3]> var_2058_begin_0 = const()[name = string("op_2058_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2058_end_mask_0 = const()[name = string("op_2058_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = concat_208, end_mask = var_2058_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2058_cast_fp16")];
+            tensor<int32, [4]> concat_210x = const()[name = string("concat_210x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2068_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2068_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_96_to_fp16 = const()[name = string("const_96_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_75_cast_fp16 = mul(x = var_2068_cast_fp16, y = const_96_to_fp16)[name = string("q_75_cast_fp16")];
+            tensor<int32, [4]> concat_211x = const()[name = string("concat_211x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2075_cast_fp16 = reshape(shape = concat_211x, x = var_2055_cast_fp16)[name = string("op_2075_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_97_to_fp16 = const()[name = string("const_97_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_95_cast_fp16 = mul(x = var_2075_cast_fp16, y = const_97_to_fp16)[name = string("k_95_cast_fp16")];
+            tensor<int32, [4]> concat_212x = const()[name = string("concat_212x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2082_cast_fp16 = reshape(shape = concat_212x, x = var_2058_cast_fp16)[name = string("op_2082_cast_fp16")];
+            tensor<int32, [4]> var_2083 = const()[name = string("op_2083"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)];
+            bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_133_perm_0 = const()[name = string("transpose_133_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_134_perm_0 = const()[name = string("transpose_134_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_134 = transpose(perm = transpose_134_perm_0, x = k_95_cast_fp16)[name = string("transpose_166")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_133 = transpose(perm = transpose_133_perm_0, x = q_75_cast_fp16)[name = string("transpose_167")];
+            tensor<fp16, [1, 12, ?, ?]> qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_133, y = transpose_134)[name = string("qk_55_cast_fp16")];
+            int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)];
+            int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)];
+            bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")];
+            tensor<int32, [2]> var_2086_begin_0 = const()[name = string("op_2086_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2086_end_mask_0 = const()[name = string("op_2086_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = concat_213, end_mask = var_2086_end_mask_0, x = mask_to_fp16)[name = string("op_2086_cast_fp16")];
+            int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)];
+            int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)];
+            bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")];
+            tensor<int32, [2]> var_2087_begin_0 = const()[name = string("op_2087_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2087_end_mask_0 = const()[name = string("op_2087_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2087_cast_fp16 = slice_by_index(begin = var_2087_begin_0, end = concat_214, end_mask = var_2087_end_mask_0, x = var_2086_cast_fp16)[name = string("op_2087_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2087_cast_fp16)[name = string("qk_57_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_2090_cast_fp16 = softmax(axis = var_1999, x = qk_57_cast_fp16)[name = string("op_2090_cast_fp16")];
+            bool var_2092_transpose_x_0 = const()[name = string("op_2092_transpose_x_0"), val = bool(false)];
+            bool var_2092_transpose_y_0 = const()[name = string("op_2092_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_95_cast_fp16 = transpose(perm = var_2083, x = var_2082_cast_fp16)[name = string("transpose_168")];
+            tensor<fp16, [1, 12, ?, 64]> var_2092_cast_fp16 = matmul(transpose_x = var_2092_transpose_x_0, transpose_y = var_2092_transpose_y_0, x = var_2090_cast_fp16, y = v_95_cast_fp16)[name = string("op_2092_cast_fp16")];
+            tensor<int32, [4]> var_2093 = const()[name = string("op_2093"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_215x = const()[name = string("concat_215x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_2094_cast_fp16 = transpose(perm = var_2093, x = var_2092_cast_fp16)[name = string("transpose_165")];
+            tensor<fp16, [1, ?, 768]> x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2094_cast_fp16)[name = string("x_169_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2098_to_fp16 = const()[name = string("op_2098_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235474560)))];
+            tensor<fp16, [768]> var_2099_to_fp16 = const()[name = string("op_2099_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236654272)))];
+            tensor<fp16, [1, ?, 768]> linear_75_cast_fp16 = linear(bias = var_2099_to_fp16, weight = var_2098_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")];
+            tensor<int32, [1]> var_2106_axes_0 = const()[name = string("op_2106_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236655872)))];
+            tensor<fp16, [768]> blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236657472)))];
+            tensor<fp16, [1, ?, 768]> var_2106_cast_fp16 = layer_norm(axes = var_2106_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2005_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2106_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2115_to_fp16 = const()[name = string("op_2115_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236659072)))];
+            tensor<fp16, [768]> var_2116_to_fp16 = const()[name = string("op_2116_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237838784)))];
+            tensor<fp16, [1, ?, 768]> linear_76_cast_fp16 = linear(bias = var_2116_to_fp16, weight = var_2115_to_fp16, x = var_2106_cast_fp16)[name = string("linear_76_cast_fp16")];
+            tensor<int32, [3]> concat_216 = const()[name = string("concat_216"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_217 = const()[name = string("concat_217"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_218 = const()[name = string("concat_218"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_219 = const()[name = string("concat_219"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_220x = const()[name = string("concat_220x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2136_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2136_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_98_to_fp16 = const()[name = string("const_98_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_79_cast_fp16 = mul(x = var_2136_cast_fp16, y = const_98_to_fp16)[name = string("q_79_cast_fp16")];
+            tensor<int32, [4]> var_2142 = const()[name = string("op_2142"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_2143_cast_fp16 = reshape(shape = var_2142, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2143_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_99_cast_fp16 = mul(x = var_2143_cast_fp16, y = const_99_to_fp16)[name = string("k_99_cast_fp16")];
+            tensor<int32, [4]> var_2149 = const()[name = string("op_2149"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_2150_cast_fp16 = reshape(shape = var_2149, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2150_cast_fp16")];
+            tensor<int32, [4]> var_2151 = const()[name = string("op_2151"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)];
+            bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_135_perm_0 = const()[name = string("transpose_135_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_136_perm_0 = const()[name = string("transpose_136_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_136 = transpose(perm = transpose_136_perm_0, x = k_99_cast_fp16)[name = string("transpose_162")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_135 = transpose(perm = transpose_135_perm_0, x = q_79_cast_fp16)[name = string("transpose_163")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_135, y = transpose_136)[name = string("qk_59_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_2155_cast_fp16 = softmax(axis = var_1999, x = qk_59_cast_fp16)[name = string("op_2155_cast_fp16")];
+            bool var_2157_transpose_x_0 = const()[name = string("op_2157_transpose_x_0"), val = bool(false)];
+            bool var_2157_transpose_y_0 = const()[name = string("op_2157_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_99_cast_fp16 = transpose(perm = var_2151, x = var_2150_cast_fp16)[name = string("transpose_164")];
+            tensor<fp16, [1, 12, ?, 64]> var_2157_cast_fp16 = matmul(transpose_x = var_2157_transpose_x_0, transpose_y = var_2157_transpose_y_0, x = var_2155_cast_fp16, y = v_99_cast_fp16)[name = string("op_2157_cast_fp16")];
+            tensor<int32, [4]> var_2158 = const()[name = string("op_2158"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_221x = const()[name = string("concat_221x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_2159_cast_fp16 = transpose(perm = var_2158, x = var_2157_cast_fp16)[name = string("transpose_161")];
+            tensor<fp16, [1, ?, 768]> x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2159_cast_fp16)[name = string("x_175_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2163_to_fp16 = const()[name = string("op_2163_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237840384)))];
+            tensor<fp16, [768]> var_2164_to_fp16 = const()[name = string("op_2164_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239020096)))];
+            tensor<fp16, [1, ?, 768]> linear_77_cast_fp16 = linear(bias = var_2164_to_fp16, weight = var_2163_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")];
+            tensor<int32, [1]> var_2171_axes_0 = const()[name = string("op_2171_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239021696)))];
+            tensor<fp16, [768]> blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239023296)))];
+            tensor<fp16, [1, ?, 768]> var_2171_cast_fp16 = layer_norm(axes = var_2171_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2005_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2171_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_2180_to_fp16 = const()[name = string("op_2180_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239024896)))];
+            tensor<fp16, [3072]> var_2181_to_fp16 = const()[name = string("op_2181_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243743552)))];
+            tensor<fp16, [1, ?, 3072]> linear_78_cast_fp16 = linear(bias = var_2181_to_fp16, weight = var_2180_to_fp16, x = var_2171_cast_fp16)[name = string("linear_78_cast_fp16")];
+            string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_2186_to_fp16 = const()[name = string("op_2186_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243749760)))];
+            tensor<fp16, [768]> var_2187_to_fp16 = const()[name = string("op_2187_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248468416)))];
+            tensor<fp16, [1, ?, 768]> linear_79_cast_fp16 = linear(bias = var_2187_to_fp16, weight = var_2186_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")];
+            tensor<int32, [4]> k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor<int32, [4]>([11, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_42)[name = string("k_cache_41_cast_fp16")];
+            tensor<int32, [4]> v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor<int32, [4]>([11, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_43)[name = string("v_cache_41_cast_fp16")];
+            tensor<int32, [4]> k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor<int32, [4]>([11, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")];
+            tensor<int32, [4]> v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor<int32, [4]>([11, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")];
+            int32 var_2210 = const()[name = string("op_2210"), val = int32(-1)];
+            tensor<int32, [1]> var_2228_axes_0 = const()[name = string("op_2228_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248470016)))];
+            tensor<fp16, [768]> blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248471616)))];
+            fp16 var_2216_to_fp16 = const()[name = string("op_2216_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_2228_cast_fp16 = layer_norm(axes = var_2228_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2216_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2228_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2239_to_fp16 = const()[name = string("op_2239_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248473216)))];
+            tensor<fp16, [768]> var_2240_to_fp16 = const()[name = string("op_2240_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249652928)))];
+            tensor<fp16, [1, ?, 768]> linear_80_cast_fp16 = linear(bias = var_2240_to_fp16, weight = var_2239_to_fp16, x = var_2228_cast_fp16)[name = string("linear_80_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2243_to_fp16 = const()[name = string("op_2243_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249654528)))];
+            tensor<fp16, [1, ?, 768]> linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2243_to_fp16, x = var_2228_cast_fp16)[name = string("linear_81_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2247_to_fp16 = const()[name = string("op_2247_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250834240)))];
+            tensor<fp16, [768]> var_2248_to_fp16 = const()[name = string("op_2248_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252013952)))];
+            tensor<fp16, [1, ?, 768]> linear_82_cast_fp16 = linear(bias = var_2248_to_fp16, weight = var_2247_to_fp16, x = var_2228_cast_fp16)[name = string("linear_82_cast_fp16")];
+            tensor<int32, [3]> var_2250_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2250_shape_cast_fp16")];
+            int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)];
+            int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)];
+            bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)];
+            string var_2250_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2250_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2250_shape_cast_fp16_to_uint16 = cast(dtype = var_2250_shape_cast_fp16_to_uint16_dtype_0, x = var_2250_shape_cast_fp16)[name = string("cast_130")];
+            uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2250_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")];
+            string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_129")];
+            int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")];
+            tensor<int32, [1]> expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")];
+            tensor<int32, [1]> concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor<int32, [1]>([10])];
+            int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)];
+            bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")];
+            tensor<int32, [1]> concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)];
+            bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_42)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_44_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_44 = read_state(input = k_cache1)[name = string("coreml_update_state_44")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_43)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_45_write_state")];
+            tensor<fp16, [12, 1, 448, 768]> coreml_update_state_45 = read_state(input = v_cache1)[name = string("coreml_update_state_45")];
+            int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)];
+            int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(768)];
+            int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)];
+            bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")];
+            tensor<int32, [3]> var_2266_begin_0 = const()[name = string("op_2266_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2266_end_mask_0 = const()[name = string("op_2266_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_2266_cast_fp16 = slice_by_index(begin = var_2266_begin_0, end = concat_230, end_mask = var_2266_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2266_cast_fp16")];
+            tensor<int32, [3]> var_2269_begin_0 = const()[name = string("op_2269_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2269_end_mask_0 = const()[name = string("op_2269_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_2269_cast_fp16 = slice_by_index(begin = var_2269_begin_0, end = concat_230, end_mask = var_2269_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2269_cast_fp16")];
+            tensor<int32, [4]> concat_232x = const()[name = string("concat_232x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2279_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2279_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_83_cast_fp16 = mul(x = var_2279_cast_fp16, y = const_100_to_fp16)[name = string("q_83_cast_fp16")];
+            tensor<int32, [4]> concat_233x = const()[name = string("concat_233x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2286_cast_fp16 = reshape(shape = concat_233x, x = var_2266_cast_fp16)[name = string("op_2286_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_105_cast_fp16 = mul(x = var_2286_cast_fp16, y = const_101_to_fp16)[name = string("k_105_cast_fp16")];
+            tensor<int32, [4]> concat_234x = const()[name = string("concat_234x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2293_cast_fp16 = reshape(shape = concat_234x, x = var_2269_cast_fp16)[name = string("op_2293_cast_fp16")];
+            tensor<int32, [4]> var_2294 = const()[name = string("op_2294"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)];
+            bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_137_perm_0 = const()[name = string("transpose_137_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_138_perm_0 = const()[name = string("transpose_138_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_138 = transpose(perm = transpose_138_perm_0, x = k_105_cast_fp16)[name = string("transpose_158")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_137 = transpose(perm = transpose_137_perm_0, x = q_83_cast_fp16)[name = string("transpose_159")];
+            tensor<fp16, [1, 12, ?, ?]> qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_137, y = transpose_138)[name = string("qk_61_cast_fp16")];
+            int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)];
+            int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)];
+            bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")];
+            tensor<int32, [2]> var_2297_begin_0 = const()[name = string("op_2297_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2297_end_mask_0 = const()[name = string("op_2297_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2297_cast_fp16 = slice_by_index(begin = var_2297_begin_0, end = concat_235, end_mask = var_2297_end_mask_0, x = mask_to_fp16)[name = string("op_2297_cast_fp16")];
+            int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)];
+            int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)];
+            bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")];
+            tensor<int32, [2]> var_2298_begin_0 = const()[name = string("op_2298_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2298_end_mask_0 = const()[name = string("op_2298_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = concat_236, end_mask = var_2298_end_mask_0, x = var_2297_cast_fp16)[name = string("op_2298_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2298_cast_fp16)[name = string("qk_63_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_2301_cast_fp16 = softmax(axis = var_2210, x = qk_63_cast_fp16)[name = string("op_2301_cast_fp16")];
+            bool var_2303_transpose_x_0 = const()[name = string("op_2303_transpose_x_0"), val = bool(false)];
+            bool var_2303_transpose_y_0 = const()[name = string("op_2303_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_105_cast_fp16 = transpose(perm = var_2294, x = var_2293_cast_fp16)[name = string("transpose_160")];
+            tensor<fp16, [1, 12, ?, 64]> var_2303_cast_fp16 = matmul(transpose_x = var_2303_transpose_x_0, transpose_y = var_2303_transpose_y_0, x = var_2301_cast_fp16, y = v_105_cast_fp16)[name = string("op_2303_cast_fp16")];
+            tensor<int32, [4]> var_2304 = const()[name = string("op_2304"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_237x = const()[name = string("concat_237x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_2305_cast_fp16 = transpose(perm = var_2304, x = var_2303_cast_fp16)[name = string("transpose_157")];
+            tensor<fp16, [1, ?, 768]> x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2305_cast_fp16)[name = string("x_187_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2309_to_fp16 = const()[name = string("op_2309_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252015552)))];
+            tensor<fp16, [768]> var_2310_to_fp16 = const()[name = string("op_2310_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253195264)))];
+            tensor<fp16, [1, ?, 768]> linear_83_cast_fp16 = linear(bias = var_2310_to_fp16, weight = var_2309_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")];
+            tensor<int32, [1]> var_2317_axes_0 = const()[name = string("op_2317_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253196864)))];
+            tensor<fp16, [768]> blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253198464)))];
+            tensor<fp16, [1, ?, 768]> var_2317_cast_fp16 = layer_norm(axes = var_2317_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2216_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2317_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2326_to_fp16 = const()[name = string("op_2326_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253200064)))];
+            tensor<fp16, [768]> var_2327_to_fp16 = const()[name = string("op_2327_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254379776)))];
+            tensor<fp16, [1, ?, 768]> linear_84_cast_fp16 = linear(bias = var_2327_to_fp16, weight = var_2326_to_fp16, x = var_2317_cast_fp16)[name = string("linear_84_cast_fp16")];
+            tensor<int32, [3]> concat_238 = const()[name = string("concat_238"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_239 = const()[name = string("concat_239"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_240 = const()[name = string("concat_240"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_241 = const()[name = string("concat_241"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_242x = const()[name = string("concat_242x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2347_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2347_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_102_to_fp16 = const()[name = string("const_102_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_87_cast_fp16 = mul(x = var_2347_cast_fp16, y = const_102_to_fp16)[name = string("q_87_cast_fp16")];
+            tensor<int32, [4]> var_2353 = const()[name = string("op_2353"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_2354_cast_fp16 = reshape(shape = var_2353, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2354_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_103_to_fp16 = const()[name = string("const_103_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_109_cast_fp16 = mul(x = var_2354_cast_fp16, y = const_103_to_fp16)[name = string("k_109_cast_fp16")];
+            tensor<int32, [4]> var_2360 = const()[name = string("op_2360"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_2361_cast_fp16 = reshape(shape = var_2360, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2361_cast_fp16")];
+            tensor<int32, [4]> var_2362 = const()[name = string("op_2362"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)];
+            bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_139_perm_0 = const()[name = string("transpose_139_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_140_perm_0 = const()[name = string("transpose_140_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_140 = transpose(perm = transpose_140_perm_0, x = k_109_cast_fp16)[name = string("transpose_154")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_139 = transpose(perm = transpose_139_perm_0, x = q_87_cast_fp16)[name = string("transpose_155")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_139, y = transpose_140)[name = string("qk_65_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_2366_cast_fp16 = softmax(axis = var_2210, x = qk_65_cast_fp16)[name = string("op_2366_cast_fp16")];
+            bool var_2368_transpose_x_0 = const()[name = string("op_2368_transpose_x_0"), val = bool(false)];
+            bool var_2368_transpose_y_0 = const()[name = string("op_2368_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_109_cast_fp16 = transpose(perm = var_2362, x = var_2361_cast_fp16)[name = string("transpose_156")];
+            tensor<fp16, [1, 12, ?, 64]> var_2368_cast_fp16 = matmul(transpose_x = var_2368_transpose_x_0, transpose_y = var_2368_transpose_y_0, x = var_2366_cast_fp16, y = v_109_cast_fp16)[name = string("op_2368_cast_fp16")];
+            tensor<int32, [4]> var_2369 = const()[name = string("op_2369"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_243x = const()[name = string("concat_243x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_2370_cast_fp16 = transpose(perm = var_2369, x = var_2368_cast_fp16)[name = string("transpose_153")];
+            tensor<fp16, [1, ?, 768]> x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2370_cast_fp16)[name = string("x_193_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2374_to_fp16 = const()[name = string("op_2374_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254381376)))];
+            tensor<fp16, [768]> var_2375_to_fp16 = const()[name = string("op_2375_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255561088)))];
+            tensor<fp16, [1, ?, 768]> linear_85_cast_fp16 = linear(bias = var_2375_to_fp16, weight = var_2374_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")];
+            tensor<int32, [1]> var_2382_axes_0 = const()[name = string("op_2382_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255562688)))];
+            tensor<fp16, [768]> blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255564288)))];
+            tensor<fp16, [1, ?, 768]> var_2382_cast_fp16 = layer_norm(axes = var_2382_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2216_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2382_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_2391_to_fp16 = const()[name = string("op_2391_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255565888)))];
+            tensor<fp16, [3072]> var_2392_to_fp16 = const()[name = string("op_2392_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260284544)))];
+            tensor<fp16, [1, ?, 3072]> linear_86_cast_fp16 = linear(bias = var_2392_to_fp16, weight = var_2391_to_fp16, x = var_2382_cast_fp16)[name = string("linear_86_cast_fp16")];
+            string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_2397_to_fp16 = const()[name = string("op_2397_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260290752)))];
+            tensor<fp16, [768]> var_2398_to_fp16 = const()[name = string("op_2398_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265009408)))];
+            tensor<fp16, [1, ?, 768]> linear_87_cast_fp16 = linear(bias = var_2398_to_fp16, weight = var_2397_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")];
+            tensor<int32, [4]> k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor<int32, [4]>([12, 1, 448, 768])];
+            tensor<bool, [4]> k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_44)[name = string("k_cache_45_cast_fp16")];
+            tensor<int32, [4]> v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor<int32, [4]>([12, 1, 448, 768])];
+            tensor<bool, [4]> v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 768]> v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_45)[name = string("v_cache_45_cast_fp16")];
+            tensor<int32, [4]> k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor<int32, [4]>([12, 1, 1500, 768])];
+            tensor<bool, [4]> k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")];
+            tensor<int32, [4]> v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor<int32, [4]>([12, 1, 1500, 768])];
+            tensor<bool, [4]> v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")];
+            int32 var_2421 = const()[name = string("op_2421"), val = int32(-1)];
+            tensor<int32, [1]> var_2439_axes_0 = const()[name = string("op_2439_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265011008)))];
+            tensor<fp16, [768]> blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265012608)))];
+            fp16 var_2427_to_fp16 = const()[name = string("op_2427_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_2439_cast_fp16 = layer_norm(axes = var_2439_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2427_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2439_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2450_to_fp16 = const()[name = string("op_2450_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265014208)))];
+            tensor<fp16, [768]> var_2451_to_fp16 = const()[name = string("op_2451_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266193920)))];
+            tensor<fp16, [1, ?, 768]> linear_88_cast_fp16 = linear(bias = var_2451_to_fp16, weight = var_2450_to_fp16, x = var_2439_cast_fp16)[name = string("linear_88_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2454_to_fp16 = const()[name = string("op_2454_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266195520)))];
+            tensor<fp16, [1, ?, 768]> linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2454_to_fp16, x = var_2439_cast_fp16)[name = string("linear_89_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2458_to_fp16 = const()[name = string("op_2458_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267375232)))];
+            tensor<fp16, [768]> var_2459_to_fp16 = const()[name = string("op_2459_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268554944)))];
+            tensor<fp16, [1, ?, 768]> linear_90_cast_fp16 = linear(bias = var_2459_to_fp16, weight = var_2458_to_fp16, x = var_2439_cast_fp16)[name = string("linear_90_cast_fp16")];
+            tensor<int32, [3]> var_2461_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2461_shape_cast_fp16")];
+            int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)];
+            int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)];
+            bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)];
+            string var_2461_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2461_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_2461_shape_cast_fp16_to_uint16 = cast(dtype = var_2461_shape_cast_fp16_to_uint16_dtype_0, x = var_2461_shape_cast_fp16)[name = string("cast_128")];
+            uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2461_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")];
+            string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_127")];
+            int32 end_step = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step")];
+            tensor<int32, [1]> expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step)[name = string("expand_dims_179")];
+            tensor<int32, [1]> concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor<int32, [1]>([11])];
+            int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)];
+            bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")];
+            tensor<int32, [1]> concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)];
+            bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_44)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_46_write_state")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [12, 1, 448, 768]> v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_45)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_47_write_state")];
+            int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)];
+            int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(768)];
+            int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)];
+            bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step, concat_252_values2_0))[name = string("concat_252")];
+            tensor<int32, [3]> var_2477_begin_0 = const()[name = string("op_2477_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2477_end_mask_0 = const()[name = string("op_2477_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_2477_cast_fp16 = slice_by_index(begin = var_2477_begin_0, end = concat_252, end_mask = var_2477_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2477_cast_fp16")];
+            tensor<int32, [3]> var_2480_begin_0 = const()[name = string("op_2480_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_2480_end_mask_0 = const()[name = string("op_2480_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 768]> var_2480_cast_fp16 = slice_by_index(begin = var_2480_begin_0, end = concat_252, end_mask = var_2480_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2480_cast_fp16")];
+            tensor<int32, [4]> concat_254x = const()[name = string("concat_254x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2490_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2490_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_104_to_fp16 = const()[name = string("const_104_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_91_cast_fp16 = mul(x = var_2490_cast_fp16, y = const_104_to_fp16)[name = string("q_91_cast_fp16")];
+            tensor<int32, [4]> concat_255x = const()[name = string("concat_255x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2497_cast_fp16 = reshape(shape = concat_255x, x = var_2477_cast_fp16)[name = string("op_2497_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_105_to_fp16 = const()[name = string("const_105_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> k_115_cast_fp16 = mul(x = var_2497_cast_fp16, y = const_105_to_fp16)[name = string("k_115_cast_fp16")];
+            tensor<int32, [4]> concat_256x = const()[name = string("concat_256x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2504_cast_fp16 = reshape(shape = concat_256x, x = var_2480_cast_fp16)[name = string("op_2504_cast_fp16")];
+            tensor<int32, [4]> var_2505 = const()[name = string("op_2505"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)];
+            bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_141_perm_0 = const()[name = string("transpose_141_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_142_perm_0 = const()[name = string("transpose_142_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, ?]> transpose_142 = transpose(perm = transpose_142_perm_0, x = k_115_cast_fp16)[name = string("transpose_150")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_141 = transpose(perm = transpose_141_perm_0, x = q_91_cast_fp16)[name = string("transpose_151")];
+            tensor<fp16, [1, 12, ?, ?]> qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_141, y = transpose_142)[name = string("qk_67_cast_fp16")];
+            int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)];
+            int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)];
+            bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")];
+            tensor<int32, [2]> var_2508_begin_0 = const()[name = string("op_2508_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2508_end_mask_0 = const()[name = string("op_2508_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_2508_cast_fp16 = slice_by_index(begin = var_2508_begin_0, end = concat_257, end_mask = var_2508_end_mask_0, x = mask_to_fp16)[name = string("op_2508_cast_fp16")];
+            int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)];
+            int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)];
+            bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")];
+            tensor<int32, [2]> var_2509_begin_0 = const()[name = string("op_2509_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_2509_end_mask_0 = const()[name = string("op_2509_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_2509_cast_fp16 = slice_by_index(begin = var_2509_begin_0, end = concat_258, end_mask = var_2509_end_mask_0, x = var_2508_cast_fp16)[name = string("op_2509_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2509_cast_fp16)[name = string("qk_69_cast_fp16")];
+            tensor<fp16, [1, 12, ?, ?]> var_2512_cast_fp16 = softmax(axis = var_2421, x = qk_69_cast_fp16)[name = string("op_2512_cast_fp16")];
+            bool var_2514_transpose_x_0 = const()[name = string("op_2514_transpose_x_0"), val = bool(false)];
+            bool var_2514_transpose_y_0 = const()[name = string("op_2514_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, ?, 64]> v_115_cast_fp16 = transpose(perm = var_2505, x = var_2504_cast_fp16)[name = string("transpose_152")];
+            tensor<fp16, [1, 12, ?, 64]> var_2514_cast_fp16 = matmul(transpose_x = var_2514_transpose_x_0, transpose_y = var_2514_transpose_y_0, x = var_2512_cast_fp16, y = v_115_cast_fp16)[name = string("op_2514_cast_fp16")];
+            tensor<int32, [4]> var_2515 = const()[name = string("op_2515"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_259x = const()[name = string("concat_259x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_2516_cast_fp16 = transpose(perm = var_2515, x = var_2514_cast_fp16)[name = string("transpose_149")];
+            tensor<fp16, [1, ?, 768]> x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2516_cast_fp16)[name = string("x_205_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2520_to_fp16 = const()[name = string("op_2520_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268556544)))];
+            tensor<fp16, [768]> var_2521_to_fp16 = const()[name = string("op_2521_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269736256)))];
+            tensor<fp16, [1, ?, 768]> linear_91_cast_fp16 = linear(bias = var_2521_to_fp16, weight = var_2520_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")];
+            tensor<int32, [1]> var_2528_axes_0 = const()[name = string("op_2528_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269737856)))];
+            tensor<fp16, [768]> blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269739456)))];
+            tensor<fp16, [1, ?, 768]> var_2528_cast_fp16 = layer_norm(axes = var_2528_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2427_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2528_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2537_to_fp16 = const()[name = string("op_2537_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269741056)))];
+            tensor<fp16, [768]> var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270920768)))];
+            tensor<fp16, [1, ?, 768]> linear_92_cast_fp16 = linear(bias = var_2538_to_fp16, weight = var_2537_to_fp16, x = var_2528_cast_fp16)[name = string("linear_92_cast_fp16")];
+            tensor<int32, [3]> concat_260 = const()[name = string("concat_260"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_261 = const()[name = string("concat_261"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_262 = const()[name = string("concat_262"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_263 = const()[name = string("concat_263"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 768]> v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_264x = const()[name = string("concat_264x"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, ?, 12, 64]> var_2558_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2558_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_106_to_fp16 = const()[name = string("const_106_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 12, 64]> q_cast_fp16 = mul(x = var_2558_cast_fp16, y = const_106_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_2564 = const()[name = string("op_2564"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_2565_cast_fp16 = reshape(shape = var_2564, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2565_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_107_to_fp16 = const()[name = string("const_107_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_cast_fp16 = mul(x = var_2565_cast_fp16, y = const_107_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_2571 = const()[name = string("op_2571"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_2572_cast_fp16 = reshape(shape = var_2571, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2572_cast_fp16")];
+            tensor<int32, [4]> var_2573 = const()[name = string("op_2573"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_143_perm_0 = const()[name = string("transpose_143_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_144_perm_0 = const()[name = string("transpose_144_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_144 = transpose(perm = transpose_144_perm_0, x = k_cast_fp16)[name = string("transpose_146")];
+            tensor<fp16, [1, 12, ?, 64]> transpose_143 = transpose(perm = transpose_143_perm_0, x = q_cast_fp16)[name = string("transpose_147")];
+            tensor<fp16, [1, 12, ?, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_143, y = transpose_144)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 12, ?, 1500]> var_2577_cast_fp16 = softmax(axis = var_2421, x = qk_cast_fp16)[name = string("op_2577_cast_fp16")];
+            bool var_2579_transpose_x_0 = const()[name = string("op_2579_transpose_x_0"), val = bool(false)];
+            bool var_2579_transpose_y_0 = const()[name = string("op_2579_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_cast_fp16 = transpose(perm = var_2573, x = var_2572_cast_fp16)[name = string("transpose_148")];
+            tensor<fp16, [1, 12, ?, 64]> var_2579_cast_fp16 = matmul(transpose_x = var_2579_transpose_x_0, transpose_y = var_2579_transpose_y_0, x = var_2577_cast_fp16, y = v_cast_fp16)[name = string("op_2579_cast_fp16")];
+            tensor<int32, [4]> var_2580 = const()[name = string("op_2580"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_265x = const()[name = string("concat_265x"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, ?, 12, 64]> var_2581_cast_fp16 = transpose(perm = var_2580, x = var_2579_cast_fp16)[name = string("transpose_145")];
+            tensor<fp16, [1, ?, 768]> x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2581_cast_fp16)[name = string("x_211_cast_fp16")];
+            tensor<fp16, [768, 768]> var_2585_to_fp16 = const()[name = string("op_2585_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270922368)))];
+            tensor<fp16, [768]> var_2586_to_fp16 = const()[name = string("op_2586_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272102080)))];
+            tensor<fp16, [1, ?, 768]> linear_93_cast_fp16 = linear(bias = var_2586_to_fp16, weight = var_2585_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")];
+            tensor<int32, [1]> var_2593_axes_0 = const()[name = string("op_2593_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272103680)))];
+            tensor<fp16, [768]> blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272105280)))];
+            tensor<fp16, [1, ?, 768]> var_2593_cast_fp16 = layer_norm(axes = var_2593_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2427_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2593_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_2602_to_fp16 = const()[name = string("op_2602_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272106880)))];
+            tensor<fp16, [3072]> var_2603_to_fp16 = const()[name = string("op_2603_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276825536)))];
+            tensor<fp16, [1, ?, 3072]> linear_94_cast_fp16 = linear(bias = var_2603_to_fp16, weight = var_2602_to_fp16, x = var_2593_cast_fp16)[name = string("linear_94_cast_fp16")];
+            string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 3072]> x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_2608_to_fp16 = const()[name = string("op_2608_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276831744)))];
+            tensor<fp16, [768]> var_2609_to_fp16 = const()[name = string("op_2609_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281550400)))];
+            tensor<fp16, [1, ?, 768]> linear_95_cast_fp16 = linear(bias = var_2609_to_fp16, weight = var_2608_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")];
+            tensor<fp16, [1, ?, 768]> x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")];
+            tensor<int32, [1]> var_2622_axes_0 = const()[name = string("op_2622_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281552000)))];
+            tensor<fp16, [768]> ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281553600)))];
+            fp16 var_2613_to_fp16 = const()[name = string("op_2613_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 768]> var_2622_cast_fp16 = layer_norm(axes = var_2622_axes_0, beta = ln_bias_to_fp16, epsilon = var_2613_to_fp16, gamma = ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2622_cast_fp16")];
+            tensor<fp16, [51865]> var_2632_bias_0_to_fp16 = const()[name = string("op_2632_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281555200)))];
+            tensor<fp16, [1, ?, 51865]> logits = linear(bias = var_2632_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_2622_cast_fp16)[name = string("op_2632_cast_fp16")];
+        } -> (logits);
+}
\ No newline at end of file
diff --git a/small/decoder_second.mlmodelc/weights/weight.bin b/small/decoder_second.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..90ca561c70632e65de0842117756ea468a571c60
--- /dev/null
+++ b/small/decoder_second.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3337ba112e828dc28889bc3f1f5a2dedddd2f25e867247ab569aa2ff8df7f4f
+size 281658994
diff --git a/small/encoder.mlmodelc/analytics/coremldata.bin b/small/encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f31b754669e1303c2372c9342aab1b63bd7cab93
--- /dev/null
+++ b/small/encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a641e986ed9ea8170754f3317c8f0b565bc752fd791d159468a3f6a62fe1757
+size 243
diff --git a/small/encoder.mlmodelc/coremldata.bin b/small/encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e06088a447412c3ca2fa17a18981aa7cb7a92bc7
--- /dev/null
+++ b/small/encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:268793fee4ed1a8616dc360502d6cafd32d4074c9a50651d775d02cc62fbc1a3
+size 318
diff --git a/small/encoder.mlmodelc/metadata.json b/small/encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..d4d4ea5de53e1dd5f75562828a806b102bcbd520
--- /dev/null
+++ b/small/encoder.mlmodelc/metadata.json
@@ -0,0 +1,69 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500 × 768)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 768]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.mul" : 24,
+      "Ios18.softmax" : 12,
+      "Ios18.linear" : 72,
+      "Ios18.gelu" : 14,
+      "Ios18.layerNorm" : 25,
+      "Ios18.transpose" : 49,
+      "Ios18.matmul" : 24,
+      "Ios18.conv" : 2,
+      "Ios18.add" : 25,
+      "Ios18.reshape" : 48
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "encoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/small/encoder.mlmodelc/model.mil b/small/encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..410fe63661e7fb031279aa0c89684ec2cc4eb33f
--- /dev/null
+++ b/small/encoder.mlmodelc/model.mil
@@ -0,0 +1,732 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [1, 80, 3000]> logmel_data) {
+            string var_44_pad_type_0 = const()[name = string("op_44_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_44_pad_0 = const()[name = string("op_44_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_44_strides_0 = const()[name = string("op_44_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_44_dilations_0 = const()[name = string("op_44_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_44_groups_0 = const()[name = string("op_44_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 80, 3]> weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor<fp16, [768, 80, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [768]> bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368768)))];
+            tensor<fp16, [1, 768, 3000]> var_44_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_44_dilations_0, groups = var_44_groups_0, pad = var_44_pad_0, pad_type = var_44_pad_type_0, strides = var_44_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_44_cast_fp16")];
+            string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 768, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_44_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_62_pad_type_0 = const()[name = string("op_62_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_62_pad_0 = const()[name = string("op_62_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_62_strides_0 = const()[name = string("op_62_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_62_dilations_0 = const()[name = string("op_62_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_62_groups_0 = const()[name = string("op_62_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 3]> weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor<fp16, [768, 768, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370368)))];
+            tensor<fp16, [768]> bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3909376)))];
+            tensor<fp16, [1, 768, 1500]> var_62_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_62_dilations_0, groups = var_62_groups_0, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_62_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_62_cast_fp16")];
+            string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 768, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_62_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<int32, [3]> var_68 = const()[name = string("op_68"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [1500, 768]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [1500, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3910976)))];
+            tensor<fp16, [1, 1500, 768]> x_5_cast_fp16 = transpose(perm = var_68, x = x_3_cast_fp16)[name = string("transpose_120")];
+            tensor<fp16, [1, 1500, 768]> var_71_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_71_cast_fp16")];
+            int32 var_84 = const()[name = string("op_84"), val = int32(-1)];
+            tensor<int32, [1]> var_100_axes_0 = const()[name = string("op_100_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6215040)))];
+            tensor<fp16, [768]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6216640)))];
+            fp16 var_90_to_fp16 = const()[name = string("op_90_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_100_cast_fp16 = layer_norm(axes = var_100_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_90_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_71_cast_fp16)[name = string("op_100_cast_fp16")];
+            tensor<fp16, [768, 768]> var_111_to_fp16 = const()[name = string("op_111_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6218240)))];
+            tensor<fp16, [768]> var_112_to_fp16 = const()[name = string("op_112_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7397952)))];
+            tensor<fp16, [1, 1500, 768]> linear_0_cast_fp16 = linear(bias = var_112_to_fp16, weight = var_111_to_fp16, x = var_100_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [768, 768]> var_115_to_fp16 = const()[name = string("op_115_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7399552)))];
+            tensor<fp16, [768]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8579264)))];
+            tensor<fp16, [1, 1500, 768]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_115_to_fp16, x = var_100_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [768, 768]> var_119_to_fp16 = const()[name = string("op_119_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8580864)))];
+            tensor<fp16, [768]> var_120_to_fp16 = const()[name = string("op_120_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9760576)))];
+            tensor<fp16, [1, 1500, 768]> linear_2_cast_fp16 = linear(bias = var_120_to_fp16, weight = var_119_to_fp16, x = var_100_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [4]> var_128 = const()[name = string("op_128"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_129_cast_fp16 = reshape(shape = var_128, x = linear_0_cast_fp16)[name = string("op_129_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_84_to_fp16 = const()[name = string("const_84_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_3_cast_fp16 = mul(x = var_129_cast_fp16, y = const_84_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> var_135 = const()[name = string("op_135"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_136_cast_fp16 = reshape(shape = var_135, x = linear_1_cast_fp16)[name = string("op_136_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_85_to_fp16 = const()[name = string("const_85_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_3_cast_fp16 = mul(x = var_136_cast_fp16, y = const_85_to_fp16)[name = string("k_3_cast_fp16")];
+            tensor<int32, [4]> var_142 = const()[name = string("op_142"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_143_cast_fp16 = reshape(shape = var_142, x = linear_2_cast_fp16)[name = string("op_143_cast_fp16")];
+            tensor<int32, [4]> var_144 = const()[name = string("op_144"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_49 = transpose(perm = transpose_49_perm_0, x = k_3_cast_fp16)[name = string("transpose_117")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_48 = transpose(perm = transpose_48_perm_0, x = q_3_cast_fp16)[name = string("transpose_118")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_48, y = transpose_49)[name = string("qk_1_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_148_cast_fp16 = softmax(axis = var_84, x = qk_1_cast_fp16)[name = string("op_148_cast_fp16")];
+            bool var_150_transpose_x_0 = const()[name = string("op_150_transpose_x_0"), val = bool(false)];
+            bool var_150_transpose_y_0 = const()[name = string("op_150_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_3_cast_fp16 = transpose(perm = var_144, x = var_143_cast_fp16)[name = string("transpose_119")];
+            tensor<fp16, [1, 12, 1500, 64]> var_150_cast_fp16 = matmul(transpose_x = var_150_transpose_x_0, transpose_y = var_150_transpose_y_0, x = var_148_cast_fp16, y = v_3_cast_fp16)[name = string("op_150_cast_fp16")];
+            tensor<int32, [4]> var_151 = const()[name = string("op_151"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_152_cast_fp16 = transpose(perm = var_151, x = var_150_cast_fp16)[name = string("transpose_116")];
+            tensor<fp16, [1, 1500, 768]> x_11_cast_fp16 = reshape(shape = concat_0, x = var_152_cast_fp16)[name = string("x_11_cast_fp16")];
+            tensor<fp16, [768, 768]> var_156_to_fp16 = const()[name = string("op_156_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9762176)))];
+            tensor<fp16, [768]> var_157_to_fp16 = const()[name = string("op_157_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10941888)))];
+            tensor<fp16, [1, 1500, 768]> linear_3_cast_fp16 = linear(bias = var_157_to_fp16, weight = var_156_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_13_cast_fp16 = add(x = var_71_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<int32, [1]> var_164_axes_0 = const()[name = string("op_164_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10943488)))];
+            tensor<fp16, [768]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10945088)))];
+            tensor<fp16, [1, 1500, 768]> var_164_cast_fp16 = layer_norm(axes = var_164_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_90_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_164_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_173_to_fp16 = const()[name = string("op_173_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10946688)))];
+            tensor<fp16, [3072]> var_174_to_fp16 = const()[name = string("op_174_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15665344)))];
+            tensor<fp16, [1, 1500, 3072]> linear_4_cast_fp16 = linear(bias = var_174_to_fp16, weight = var_173_to_fp16, x = var_164_cast_fp16)[name = string("linear_4_cast_fp16")];
+            string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15671552)))];
+            tensor<fp16, [768]> var_180_to_fp16 = const()[name = string("op_180_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20390208)))];
+            tensor<fp16, [1, 1500, 768]> linear_5_cast_fp16 = linear(bias = var_180_to_fp16, weight = var_179_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")];
+            int32 var_190 = const()[name = string("op_190"), val = int32(-1)];
+            tensor<int32, [1]> var_206_axes_0 = const()[name = string("op_206_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20391808)))];
+            tensor<fp16, [768]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20393408)))];
+            fp16 var_196_to_fp16 = const()[name = string("op_196_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_206_cast_fp16 = layer_norm(axes = var_206_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_196_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_206_cast_fp16")];
+            tensor<fp16, [768, 768]> var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20395008)))];
+            tensor<fp16, [768]> var_218_to_fp16 = const()[name = string("op_218_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21574720)))];
+            tensor<fp16, [1, 1500, 768]> linear_6_cast_fp16 = linear(bias = var_218_to_fp16, weight = var_217_to_fp16, x = var_206_cast_fp16)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [768, 768]> var_221_to_fp16 = const()[name = string("op_221_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21576320)))];
+            tensor<fp16, [1, 1500, 768]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_221_to_fp16, x = var_206_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [768, 768]> var_225_to_fp16 = const()[name = string("op_225_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22756032)))];
+            tensor<fp16, [768]> var_226_to_fp16 = const()[name = string("op_226_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23935744)))];
+            tensor<fp16, [1, 1500, 768]> linear_8_cast_fp16 = linear(bias = var_226_to_fp16, weight = var_225_to_fp16, x = var_206_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<int32, [4]> var_234 = const()[name = string("op_234"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_235_cast_fp16 = reshape(shape = var_234, x = linear_6_cast_fp16)[name = string("op_235_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_86_to_fp16 = const()[name = string("const_86_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_7_cast_fp16 = mul(x = var_235_cast_fp16, y = const_86_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_241 = const()[name = string("op_241"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_242_cast_fp16 = reshape(shape = var_241, x = linear_7_cast_fp16)[name = string("op_242_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_87_to_fp16 = const()[name = string("const_87_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_7_cast_fp16 = mul(x = var_242_cast_fp16, y = const_87_to_fp16)[name = string("k_7_cast_fp16")];
+            tensor<int32, [4]> var_248 = const()[name = string("op_248"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_249_cast_fp16 = reshape(shape = var_248, x = linear_8_cast_fp16)[name = string("op_249_cast_fp16")];
+            tensor<int32, [4]> var_250 = const()[name = string("op_250"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)];
+            bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_51 = transpose(perm = transpose_51_perm_0, x = k_7_cast_fp16)[name = string("transpose_113")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_50 = transpose(perm = transpose_50_perm_0, x = q_7_cast_fp16)[name = string("transpose_114")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_50, y = transpose_51)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_254_cast_fp16 = softmax(axis = var_190, x = qk_3_cast_fp16)[name = string("op_254_cast_fp16")];
+            bool var_256_transpose_x_0 = const()[name = string("op_256_transpose_x_0"), val = bool(false)];
+            bool var_256_transpose_y_0 = const()[name = string("op_256_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_7_cast_fp16 = transpose(perm = var_250, x = var_249_cast_fp16)[name = string("transpose_115")];
+            tensor<fp16, [1, 12, 1500, 64]> var_256_cast_fp16 = matmul(transpose_x = var_256_transpose_x_0, transpose_y = var_256_transpose_y_0, x = var_254_cast_fp16, y = v_7_cast_fp16)[name = string("op_256_cast_fp16")];
+            tensor<int32, [4]> var_257 = const()[name = string("op_257"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_258_cast_fp16 = transpose(perm = var_257, x = var_256_cast_fp16)[name = string("transpose_112")];
+            tensor<fp16, [1, 1500, 768]> x_23_cast_fp16 = reshape(shape = concat_1, x = var_258_cast_fp16)[name = string("x_23_cast_fp16")];
+            tensor<fp16, [768, 768]> var_262_to_fp16 = const()[name = string("op_262_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23937344)))];
+            tensor<fp16, [768]> var_263_to_fp16 = const()[name = string("op_263_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25117056)))];
+            tensor<fp16, [1, 1500, 768]> linear_9_cast_fp16 = linear(bias = var_263_to_fp16, weight = var_262_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<int32, [1]> var_270_axes_0 = const()[name = string("op_270_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25118656)))];
+            tensor<fp16, [768]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25120256)))];
+            tensor<fp16, [1, 1500, 768]> var_270_cast_fp16 = layer_norm(axes = var_270_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_196_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_270_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25121856)))];
+            tensor<fp16, [3072]> var_280_to_fp16 = const()[name = string("op_280_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29840512)))];
+            tensor<fp16, [1, 1500, 3072]> linear_10_cast_fp16 = linear(bias = var_280_to_fp16, weight = var_279_to_fp16, x = var_270_cast_fp16)[name = string("linear_10_cast_fp16")];
+            string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_285_to_fp16 = const()[name = string("op_285_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29846720)))];
+            tensor<fp16, [768]> var_286_to_fp16 = const()[name = string("op_286_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34565376)))];
+            tensor<fp16, [1, 1500, 768]> linear_11_cast_fp16 = linear(bias = var_286_to_fp16, weight = var_285_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")];
+            int32 var_296 = const()[name = string("op_296"), val = int32(-1)];
+            tensor<int32, [1]> var_312_axes_0 = const()[name = string("op_312_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34566976)))];
+            tensor<fp16, [768]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34568576)))];
+            fp16 var_302_to_fp16 = const()[name = string("op_302_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_312_cast_fp16 = layer_norm(axes = var_312_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_302_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_312_cast_fp16")];
+            tensor<fp16, [768, 768]> var_323_to_fp16 = const()[name = string("op_323_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34570176)))];
+            tensor<fp16, [768]> var_324_to_fp16 = const()[name = string("op_324_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35749888)))];
+            tensor<fp16, [1, 1500, 768]> linear_12_cast_fp16 = linear(bias = var_324_to_fp16, weight = var_323_to_fp16, x = var_312_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [768, 768]> var_327_to_fp16 = const()[name = string("op_327_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35751488)))];
+            tensor<fp16, [1, 1500, 768]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_327_to_fp16, x = var_312_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [768, 768]> var_331_to_fp16 = const()[name = string("op_331_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36931200)))];
+            tensor<fp16, [768]> var_332_to_fp16 = const()[name = string("op_332_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38110912)))];
+            tensor<fp16, [1, 1500, 768]> linear_14_cast_fp16 = linear(bias = var_332_to_fp16, weight = var_331_to_fp16, x = var_312_cast_fp16)[name = string("linear_14_cast_fp16")];
+            tensor<int32, [4]> var_340 = const()[name = string("op_340"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_341_cast_fp16 = reshape(shape = var_340, x = linear_12_cast_fp16)[name = string("op_341_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_11_cast_fp16 = mul(x = var_341_cast_fp16, y = const_88_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> var_347 = const()[name = string("op_347"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_348_cast_fp16 = reshape(shape = var_347, x = linear_13_cast_fp16)[name = string("op_348_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_89_to_fp16 = const()[name = string("const_89_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_11_cast_fp16 = mul(x = var_348_cast_fp16, y = const_89_to_fp16)[name = string("k_11_cast_fp16")];
+            tensor<int32, [4]> var_354 = const()[name = string("op_354"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_355_cast_fp16 = reshape(shape = var_354, x = linear_14_cast_fp16)[name = string("op_355_cast_fp16")];
+            tensor<int32, [4]> var_356 = const()[name = string("op_356"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_53 = transpose(perm = transpose_53_perm_0, x = k_11_cast_fp16)[name = string("transpose_109")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_52 = transpose(perm = transpose_52_perm_0, x = q_11_cast_fp16)[name = string("transpose_110")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_52, y = transpose_53)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_360_cast_fp16 = softmax(axis = var_296, x = qk_5_cast_fp16)[name = string("op_360_cast_fp16")];
+            bool var_362_transpose_x_0 = const()[name = string("op_362_transpose_x_0"), val = bool(false)];
+            bool var_362_transpose_y_0 = const()[name = string("op_362_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_11_cast_fp16 = transpose(perm = var_356, x = var_355_cast_fp16)[name = string("transpose_111")];
+            tensor<fp16, [1, 12, 1500, 64]> var_362_cast_fp16 = matmul(transpose_x = var_362_transpose_x_0, transpose_y = var_362_transpose_y_0, x = var_360_cast_fp16, y = v_11_cast_fp16)[name = string("op_362_cast_fp16")];
+            tensor<int32, [4]> var_363 = const()[name = string("op_363"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_364_cast_fp16 = transpose(perm = var_363, x = var_362_cast_fp16)[name = string("transpose_108")];
+            tensor<fp16, [1, 1500, 768]> x_35_cast_fp16 = reshape(shape = concat_2, x = var_364_cast_fp16)[name = string("x_35_cast_fp16")];
+            tensor<fp16, [768, 768]> var_368_to_fp16 = const()[name = string("op_368_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38112512)))];
+            tensor<fp16, [768]> var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39292224)))];
+            tensor<fp16, [1, 1500, 768]> linear_15_cast_fp16 = linear(bias = var_369_to_fp16, weight = var_368_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<int32, [1]> var_376_axes_0 = const()[name = string("op_376_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39293824)))];
+            tensor<fp16, [768]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39295424)))];
+            tensor<fp16, [1, 1500, 768]> var_376_cast_fp16 = layer_norm(axes = var_376_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_302_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_376_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_385_to_fp16 = const()[name = string("op_385_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39297024)))];
+            tensor<fp16, [3072]> var_386_to_fp16 = const()[name = string("op_386_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44015680)))];
+            tensor<fp16, [1, 1500, 3072]> linear_16_cast_fp16 = linear(bias = var_386_to_fp16, weight = var_385_to_fp16, x = var_376_cast_fp16)[name = string("linear_16_cast_fp16")];
+            string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_391_to_fp16 = const()[name = string("op_391_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44021888)))];
+            tensor<fp16, [768]> var_392_to_fp16 = const()[name = string("op_392_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48740544)))];
+            tensor<fp16, [1, 1500, 768]> linear_17_cast_fp16 = linear(bias = var_392_to_fp16, weight = var_391_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")];
+            int32 var_402 = const()[name = string("op_402"), val = int32(-1)];
+            tensor<int32, [1]> var_418_axes_0 = const()[name = string("op_418_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48742144)))];
+            tensor<fp16, [768]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48743744)))];
+            fp16 var_408_to_fp16 = const()[name = string("op_408_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_418_cast_fp16 = layer_norm(axes = var_418_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_408_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_418_cast_fp16")];
+            tensor<fp16, [768, 768]> var_429_to_fp16 = const()[name = string("op_429_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48745344)))];
+            tensor<fp16, [768]> var_430_to_fp16 = const()[name = string("op_430_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49925056)))];
+            tensor<fp16, [1, 1500, 768]> linear_18_cast_fp16 = linear(bias = var_430_to_fp16, weight = var_429_to_fp16, x = var_418_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [768, 768]> var_433_to_fp16 = const()[name = string("op_433_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49926656)))];
+            tensor<fp16, [1, 1500, 768]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_433_to_fp16, x = var_418_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [768, 768]> var_437_to_fp16 = const()[name = string("op_437_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51106368)))];
+            tensor<fp16, [768]> var_438_to_fp16 = const()[name = string("op_438_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52286080)))];
+            tensor<fp16, [1, 1500, 768]> linear_20_cast_fp16 = linear(bias = var_438_to_fp16, weight = var_437_to_fp16, x = var_418_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [4]> var_446 = const()[name = string("op_446"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_447_cast_fp16 = reshape(shape = var_446, x = linear_18_cast_fp16)[name = string("op_447_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_90_to_fp16 = const()[name = string("const_90_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_15_cast_fp16 = mul(x = var_447_cast_fp16, y = const_90_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_453 = const()[name = string("op_453"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_454_cast_fp16 = reshape(shape = var_453, x = linear_19_cast_fp16)[name = string("op_454_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_91_to_fp16 = const()[name = string("const_91_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_15_cast_fp16 = mul(x = var_454_cast_fp16, y = const_91_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> var_460 = const()[name = string("op_460"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_461_cast_fp16 = reshape(shape = var_460, x = linear_20_cast_fp16)[name = string("op_461_cast_fp16")];
+            tensor<int32, [4]> var_462 = const()[name = string("op_462"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_55 = transpose(perm = transpose_55_perm_0, x = k_15_cast_fp16)[name = string("transpose_105")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_54 = transpose(perm = transpose_54_perm_0, x = q_15_cast_fp16)[name = string("transpose_106")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_54, y = transpose_55)[name = string("qk_7_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_466_cast_fp16 = softmax(axis = var_402, x = qk_7_cast_fp16)[name = string("op_466_cast_fp16")];
+            bool var_468_transpose_x_0 = const()[name = string("op_468_transpose_x_0"), val = bool(false)];
+            bool var_468_transpose_y_0 = const()[name = string("op_468_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_15_cast_fp16 = transpose(perm = var_462, x = var_461_cast_fp16)[name = string("transpose_107")];
+            tensor<fp16, [1, 12, 1500, 64]> var_468_cast_fp16 = matmul(transpose_x = var_468_transpose_x_0, transpose_y = var_468_transpose_y_0, x = var_466_cast_fp16, y = v_15_cast_fp16)[name = string("op_468_cast_fp16")];
+            tensor<int32, [4]> var_469 = const()[name = string("op_469"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_470_cast_fp16 = transpose(perm = var_469, x = var_468_cast_fp16)[name = string("transpose_104")];
+            tensor<fp16, [1, 1500, 768]> x_47_cast_fp16 = reshape(shape = concat_3, x = var_470_cast_fp16)[name = string("x_47_cast_fp16")];
+            tensor<fp16, [768, 768]> var_474_to_fp16 = const()[name = string("op_474_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52287680)))];
+            tensor<fp16, [768]> var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53467392)))];
+            tensor<fp16, [1, 1500, 768]> linear_21_cast_fp16 = linear(bias = var_475_to_fp16, weight = var_474_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<int32, [1]> var_482_axes_0 = const()[name = string("op_482_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53468992)))];
+            tensor<fp16, [768]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53470592)))];
+            tensor<fp16, [1, 1500, 768]> var_482_cast_fp16 = layer_norm(axes = var_482_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_408_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_482_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_491_to_fp16 = const()[name = string("op_491_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53472192)))];
+            tensor<fp16, [3072]> var_492_to_fp16 = const()[name = string("op_492_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58190848)))];
+            tensor<fp16, [1, 1500, 3072]> linear_22_cast_fp16 = linear(bias = var_492_to_fp16, weight = var_491_to_fp16, x = var_482_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_497_to_fp16 = const()[name = string("op_497_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58197056)))];
+            tensor<fp16, [768]> var_498_to_fp16 = const()[name = string("op_498_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62915712)))];
+            tensor<fp16, [1, 1500, 768]> linear_23_cast_fp16 = linear(bias = var_498_to_fp16, weight = var_497_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_55_cast_fp16")];
+            int32 var_508 = const()[name = string("op_508"), val = int32(-1)];
+            tensor<int32, [1]> var_524_axes_0 = const()[name = string("op_524_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62917312)))];
+            tensor<fp16, [768]> blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62918912)))];
+            fp16 var_514_to_fp16 = const()[name = string("op_514_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_524_cast_fp16 = layer_norm(axes = var_524_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_514_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = string("op_524_cast_fp16")];
+            tensor<fp16, [768, 768]> var_535_to_fp16 = const()[name = string("op_535_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62920512)))];
+            tensor<fp16, [768]> var_536_to_fp16 = const()[name = string("op_536_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64100224)))];
+            tensor<fp16, [1, 1500, 768]> linear_24_cast_fp16 = linear(bias = var_536_to_fp16, weight = var_535_to_fp16, x = var_524_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [768, 768]> var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64101824)))];
+            tensor<fp16, [1, 1500, 768]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_539_to_fp16, x = var_524_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [768, 768]> var_543_to_fp16 = const()[name = string("op_543_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65281536)))];
+            tensor<fp16, [768]> var_544_to_fp16 = const()[name = string("op_544_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66461248)))];
+            tensor<fp16, [1, 1500, 768]> linear_26_cast_fp16 = linear(bias = var_544_to_fp16, weight = var_543_to_fp16, x = var_524_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [4]> var_552 = const()[name = string("op_552"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_553_cast_fp16 = reshape(shape = var_552, x = linear_24_cast_fp16)[name = string("op_553_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_92_to_fp16 = const()[name = string("const_92_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_19_cast_fp16 = mul(x = var_553_cast_fp16, y = const_92_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> var_559 = const()[name = string("op_559"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_560_cast_fp16 = reshape(shape = var_559, x = linear_25_cast_fp16)[name = string("op_560_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_93_to_fp16 = const()[name = string("const_93_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_19_cast_fp16 = mul(x = var_560_cast_fp16, y = const_93_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_566 = const()[name = string("op_566"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_567_cast_fp16 = reshape(shape = var_566, x = linear_26_cast_fp16)[name = string("op_567_cast_fp16")];
+            tensor<int32, [4]> var_568 = const()[name = string("op_568"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_9_transpose_x_0 = const()[name = string("qk_9_transpose_x_0"), val = bool(false)];
+            bool qk_9_transpose_y_0 = const()[name = string("qk_9_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_57 = transpose(perm = transpose_57_perm_0, x = k_19_cast_fp16)[name = string("transpose_101")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_56 = transpose(perm = transpose_56_perm_0, x = q_19_cast_fp16)[name = string("transpose_102")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_56, y = transpose_57)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_572_cast_fp16 = softmax(axis = var_508, x = qk_9_cast_fp16)[name = string("op_572_cast_fp16")];
+            bool var_574_transpose_x_0 = const()[name = string("op_574_transpose_x_0"), val = bool(false)];
+            bool var_574_transpose_y_0 = const()[name = string("op_574_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_568, x = var_567_cast_fp16)[name = string("transpose_103")];
+            tensor<fp16, [1, 12, 1500, 64]> var_574_cast_fp16 = matmul(transpose_x = var_574_transpose_x_0, transpose_y = var_574_transpose_y_0, x = var_572_cast_fp16, y = v_19_cast_fp16)[name = string("op_574_cast_fp16")];
+            tensor<int32, [4]> var_575 = const()[name = string("op_575"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_4 = const()[name = string("concat_4"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_576_cast_fp16 = transpose(perm = var_575, x = var_574_cast_fp16)[name = string("transpose_100")];
+            tensor<fp16, [1, 1500, 768]> x_59_cast_fp16 = reshape(shape = concat_4, x = var_576_cast_fp16)[name = string("x_59_cast_fp16")];
+            tensor<fp16, [768, 768]> var_580_to_fp16 = const()[name = string("op_580_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66462848)))];
+            tensor<fp16, [768]> var_581_to_fp16 = const()[name = string("op_581_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67642560)))];
+            tensor<fp16, [1, 1500, 768]> linear_27_cast_fp16 = linear(bias = var_581_to_fp16, weight = var_580_to_fp16, x = x_59_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<int32, [1]> var_588_axes_0 = const()[name = string("op_588_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67644160)))];
+            tensor<fp16, [768]> blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67645760)))];
+            tensor<fp16, [1, 1500, 768]> var_588_cast_fp16 = layer_norm(axes = var_588_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_514_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = string("op_588_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_597_to_fp16 = const()[name = string("op_597_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67647360)))];
+            tensor<fp16, [3072]> var_598_to_fp16 = const()[name = string("op_598_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72366016)))];
+            tensor<fp16, [1, 1500, 3072]> linear_28_cast_fp16 = linear(bias = var_598_to_fp16, weight = var_597_to_fp16, x = var_588_cast_fp16)[name = string("linear_28_cast_fp16")];
+            string x_65_mode_0 = const()[name = string("x_65_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = string("x_65_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_603_to_fp16 = const()[name = string("op_603_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72372224)))];
+            tensor<fp16, [768]> var_604_to_fp16 = const()[name = string("op_604_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77090880)))];
+            tensor<fp16, [1, 1500, 768]> linear_29_cast_fp16 = linear(bias = var_604_to_fp16, weight = var_603_to_fp16, x = x_65_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = string("x_67_cast_fp16")];
+            int32 var_614 = const()[name = string("op_614"), val = int32(-1)];
+            tensor<int32, [1]> var_630_axes_0 = const()[name = string("op_630_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77092480)))];
+            tensor<fp16, [768]> blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77094080)))];
+            fp16 var_620_to_fp16 = const()[name = string("op_620_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_630_cast_fp16 = layer_norm(axes = var_630_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_620_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = string("op_630_cast_fp16")];
+            tensor<fp16, [768, 768]> var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77095680)))];
+            tensor<fp16, [768]> var_642_to_fp16 = const()[name = string("op_642_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78275392)))];
+            tensor<fp16, [1, 1500, 768]> linear_30_cast_fp16 = linear(bias = var_642_to_fp16, weight = var_641_to_fp16, x = var_630_cast_fp16)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [768, 768]> var_645_to_fp16 = const()[name = string("op_645_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78276992)))];
+            tensor<fp16, [1, 1500, 768]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_645_to_fp16, x = var_630_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [768, 768]> var_649_to_fp16 = const()[name = string("op_649_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79456704)))];
+            tensor<fp16, [768]> var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80636416)))];
+            tensor<fp16, [1, 1500, 768]> linear_32_cast_fp16 = linear(bias = var_650_to_fp16, weight = var_649_to_fp16, x = var_630_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<int32, [4]> var_658 = const()[name = string("op_658"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_659_cast_fp16 = reshape(shape = var_658, x = linear_30_cast_fp16)[name = string("op_659_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_94_to_fp16 = const()[name = string("const_94_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_23_cast_fp16 = mul(x = var_659_cast_fp16, y = const_94_to_fp16)[name = string("q_23_cast_fp16")];
+            tensor<int32, [4]> var_665 = const()[name = string("op_665"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_666_cast_fp16 = reshape(shape = var_665, x = linear_31_cast_fp16)[name = string("op_666_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_95_to_fp16 = const()[name = string("const_95_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_23_cast_fp16 = mul(x = var_666_cast_fp16, y = const_95_to_fp16)[name = string("k_23_cast_fp16")];
+            tensor<int32, [4]> var_672 = const()[name = string("op_672"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_673_cast_fp16 = reshape(shape = var_672, x = linear_32_cast_fp16)[name = string("op_673_cast_fp16")];
+            tensor<int32, [4]> var_674 = const()[name = string("op_674"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
+            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_59 = transpose(perm = transpose_59_perm_0, x = k_23_cast_fp16)[name = string("transpose_97")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_58 = transpose(perm = transpose_58_perm_0, x = q_23_cast_fp16)[name = string("transpose_98")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_58, y = transpose_59)[name = string("qk_11_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_678_cast_fp16 = softmax(axis = var_614, x = qk_11_cast_fp16)[name = string("op_678_cast_fp16")];
+            bool var_680_transpose_x_0 = const()[name = string("op_680_transpose_x_0"), val = bool(false)];
+            bool var_680_transpose_y_0 = const()[name = string("op_680_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_23_cast_fp16 = transpose(perm = var_674, x = var_673_cast_fp16)[name = string("transpose_99")];
+            tensor<fp16, [1, 12, 1500, 64]> var_680_cast_fp16 = matmul(transpose_x = var_680_transpose_x_0, transpose_y = var_680_transpose_y_0, x = var_678_cast_fp16, y = v_23_cast_fp16)[name = string("op_680_cast_fp16")];
+            tensor<int32, [4]> var_681 = const()[name = string("op_681"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_682_cast_fp16 = transpose(perm = var_681, x = var_680_cast_fp16)[name = string("transpose_96")];
+            tensor<fp16, [1, 1500, 768]> x_71_cast_fp16 = reshape(shape = concat_5, x = var_682_cast_fp16)[name = string("x_71_cast_fp16")];
+            tensor<fp16, [768, 768]> var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80638016)))];
+            tensor<fp16, [768]> var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81817728)))];
+            tensor<fp16, [1, 1500, 768]> linear_33_cast_fp16 = linear(bias = var_687_to_fp16, weight = var_686_to_fp16, x = x_71_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<int32, [1]> var_694_axes_0 = const()[name = string("op_694_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81819328)))];
+            tensor<fp16, [768]> blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81820928)))];
+            tensor<fp16, [1, 1500, 768]> var_694_cast_fp16 = layer_norm(axes = var_694_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_620_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = string("op_694_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_703_to_fp16 = const()[name = string("op_703_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81822528)))];
+            tensor<fp16, [3072]> var_704_to_fp16 = const()[name = string("op_704_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86541184)))];
+            tensor<fp16, [1, 1500, 3072]> linear_34_cast_fp16 = linear(bias = var_704_to_fp16, weight = var_703_to_fp16, x = var_694_cast_fp16)[name = string("linear_34_cast_fp16")];
+            string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = string("x_77_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_709_to_fp16 = const()[name = string("op_709_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86547392)))];
+            tensor<fp16, [768]> var_710_to_fp16 = const()[name = string("op_710_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91266048)))];
+            tensor<fp16, [1, 1500, 768]> linear_35_cast_fp16 = linear(bias = var_710_to_fp16, weight = var_709_to_fp16, x = x_77_cast_fp16)[name = string("linear_35_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_79_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = string("x_79_cast_fp16")];
+            int32 var_720 = const()[name = string("op_720"), val = int32(-1)];
+            tensor<int32, [1]> var_736_axes_0 = const()[name = string("op_736_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91267648)))];
+            tensor<fp16, [768]> blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91269248)))];
+            fp16 var_726_to_fp16 = const()[name = string("op_726_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_736_cast_fp16 = layer_norm(axes = var_736_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_726_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_79_cast_fp16)[name = string("op_736_cast_fp16")];
+            tensor<fp16, [768, 768]> var_747_to_fp16 = const()[name = string("op_747_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91270848)))];
+            tensor<fp16, [768]> var_748_to_fp16 = const()[name = string("op_748_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92450560)))];
+            tensor<fp16, [1, 1500, 768]> linear_36_cast_fp16 = linear(bias = var_748_to_fp16, weight = var_747_to_fp16, x = var_736_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<fp16, [768, 768]> var_751_to_fp16 = const()[name = string("op_751_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92452160)))];
+            tensor<fp16, [1, 1500, 768]> linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_751_to_fp16, x = var_736_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<fp16, [768, 768]> var_755_to_fp16 = const()[name = string("op_755_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93631872)))];
+            tensor<fp16, [768]> var_756_to_fp16 = const()[name = string("op_756_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94811584)))];
+            tensor<fp16, [1, 1500, 768]> linear_38_cast_fp16 = linear(bias = var_756_to_fp16, weight = var_755_to_fp16, x = var_736_cast_fp16)[name = string("linear_38_cast_fp16")];
+            tensor<int32, [4]> var_764 = const()[name = string("op_764"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_765_cast_fp16 = reshape(shape = var_764, x = linear_36_cast_fp16)[name = string("op_765_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_96_to_fp16 = const()[name = string("const_96_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_27_cast_fp16 = mul(x = var_765_cast_fp16, y = const_96_to_fp16)[name = string("q_27_cast_fp16")];
+            tensor<int32, [4]> var_771 = const()[name = string("op_771"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_772_cast_fp16 = reshape(shape = var_771, x = linear_37_cast_fp16)[name = string("op_772_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_97_to_fp16 = const()[name = string("const_97_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_27_cast_fp16 = mul(x = var_772_cast_fp16, y = const_97_to_fp16)[name = string("k_27_cast_fp16")];
+            tensor<int32, [4]> var_778 = const()[name = string("op_778"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_779_cast_fp16 = reshape(shape = var_778, x = linear_38_cast_fp16)[name = string("op_779_cast_fp16")];
+            tensor<int32, [4]> var_780 = const()[name = string("op_780"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
+            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_60_perm_0 = const()[name = string("transpose_60_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_61_perm_0 = const()[name = string("transpose_61_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_61 = transpose(perm = transpose_61_perm_0, x = k_27_cast_fp16)[name = string("transpose_93")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_60 = transpose(perm = transpose_60_perm_0, x = q_27_cast_fp16)[name = string("transpose_94")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_60, y = transpose_61)[name = string("qk_13_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_784_cast_fp16 = softmax(axis = var_720, x = qk_13_cast_fp16)[name = string("op_784_cast_fp16")];
+            bool var_786_transpose_x_0 = const()[name = string("op_786_transpose_x_0"), val = bool(false)];
+            bool var_786_transpose_y_0 = const()[name = string("op_786_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_27_cast_fp16 = transpose(perm = var_780, x = var_779_cast_fp16)[name = string("transpose_95")];
+            tensor<fp16, [1, 12, 1500, 64]> var_786_cast_fp16 = matmul(transpose_x = var_786_transpose_x_0, transpose_y = var_786_transpose_y_0, x = var_784_cast_fp16, y = v_27_cast_fp16)[name = string("op_786_cast_fp16")];
+            tensor<int32, [4]> var_787 = const()[name = string("op_787"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_6 = const()[name = string("concat_6"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_788_cast_fp16 = transpose(perm = var_787, x = var_786_cast_fp16)[name = string("transpose_92")];
+            tensor<fp16, [1, 1500, 768]> x_83_cast_fp16 = reshape(shape = concat_6, x = var_788_cast_fp16)[name = string("x_83_cast_fp16")];
+            tensor<fp16, [768, 768]> var_792_to_fp16 = const()[name = string("op_792_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94813184)))];
+            tensor<fp16, [768]> var_793_to_fp16 = const()[name = string("op_793_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95992896)))];
+            tensor<fp16, [1, 1500, 768]> linear_39_cast_fp16 = linear(bias = var_793_to_fp16, weight = var_792_to_fp16, x = x_83_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_85_cast_fp16 = add(x = x_79_cast_fp16, y = linear_39_cast_fp16)[name = string("x_85_cast_fp16")];
+            tensor<int32, [1]> var_800_axes_0 = const()[name = string("op_800_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95994496)))];
+            tensor<fp16, [768]> blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95996096)))];
+            tensor<fp16, [1, 1500, 768]> var_800_cast_fp16 = layer_norm(axes = var_800_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_726_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_85_cast_fp16)[name = string("op_800_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_809_to_fp16 = const()[name = string("op_809_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95997696)))];
+            tensor<fp16, [3072]> var_810_to_fp16 = const()[name = string("op_810_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100716352)))];
+            tensor<fp16, [1, 1500, 3072]> linear_40_cast_fp16 = linear(bias = var_810_to_fp16, weight = var_809_to_fp16, x = var_800_cast_fp16)[name = string("linear_40_cast_fp16")];
+            string x_89_mode_0 = const()[name = string("x_89_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_89_cast_fp16 = gelu(mode = x_89_mode_0, x = linear_40_cast_fp16)[name = string("x_89_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_815_to_fp16 = const()[name = string("op_815_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100722560)))];
+            tensor<fp16, [768]> var_816_to_fp16 = const()[name = string("op_816_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105441216)))];
+            tensor<fp16, [1, 1500, 768]> linear_41_cast_fp16 = linear(bias = var_816_to_fp16, weight = var_815_to_fp16, x = x_89_cast_fp16)[name = string("linear_41_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_91_cast_fp16 = add(x = x_85_cast_fp16, y = linear_41_cast_fp16)[name = string("x_91_cast_fp16")];
+            int32 var_826 = const()[name = string("op_826"), val = int32(-1)];
+            tensor<int32, [1]> var_842_axes_0 = const()[name = string("op_842_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105442816)))];
+            tensor<fp16, [768]> blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105444416)))];
+            fp16 var_832_to_fp16 = const()[name = string("op_832_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_842_cast_fp16 = layer_norm(axes = var_842_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_832_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_91_cast_fp16)[name = string("op_842_cast_fp16")];
+            tensor<fp16, [768, 768]> var_853_to_fp16 = const()[name = string("op_853_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105446016)))];
+            tensor<fp16, [768]> var_854_to_fp16 = const()[name = string("op_854_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106625728)))];
+            tensor<fp16, [1, 1500, 768]> linear_42_cast_fp16 = linear(bias = var_854_to_fp16, weight = var_853_to_fp16, x = var_842_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<fp16, [768, 768]> var_857_to_fp16 = const()[name = string("op_857_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106627328)))];
+            tensor<fp16, [1, 1500, 768]> linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_857_to_fp16, x = var_842_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<fp16, [768, 768]> var_861_to_fp16 = const()[name = string("op_861_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107807040)))];
+            tensor<fp16, [768]> var_862_to_fp16 = const()[name = string("op_862_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108986752)))];
+            tensor<fp16, [1, 1500, 768]> linear_44_cast_fp16 = linear(bias = var_862_to_fp16, weight = var_861_to_fp16, x = var_842_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [4]> var_870 = const()[name = string("op_870"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_871_cast_fp16 = reshape(shape = var_870, x = linear_42_cast_fp16)[name = string("op_871_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_98_to_fp16 = const()[name = string("const_98_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_31_cast_fp16 = mul(x = var_871_cast_fp16, y = const_98_to_fp16)[name = string("q_31_cast_fp16")];
+            tensor<int32, [4]> var_877 = const()[name = string("op_877"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_878_cast_fp16 = reshape(shape = var_877, x = linear_43_cast_fp16)[name = string("op_878_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_31_cast_fp16 = mul(x = var_878_cast_fp16, y = const_99_to_fp16)[name = string("k_31_cast_fp16")];
+            tensor<int32, [4]> var_884 = const()[name = string("op_884"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_885_cast_fp16 = reshape(shape = var_884, x = linear_44_cast_fp16)[name = string("op_885_cast_fp16")];
+            tensor<int32, [4]> var_886 = const()[name = string("op_886"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_15_transpose_x_0 = const()[name = string("qk_15_transpose_x_0"), val = bool(false)];
+            bool qk_15_transpose_y_0 = const()[name = string("qk_15_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_62_perm_0 = const()[name = string("transpose_62_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_63_perm_0 = const()[name = string("transpose_63_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_63 = transpose(perm = transpose_63_perm_0, x = k_31_cast_fp16)[name = string("transpose_89")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_62 = transpose(perm = transpose_62_perm_0, x = q_31_cast_fp16)[name = string("transpose_90")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_15_cast_fp16 = matmul(transpose_x = qk_15_transpose_x_0, transpose_y = qk_15_transpose_y_0, x = transpose_62, y = transpose_63)[name = string("qk_15_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_890_cast_fp16 = softmax(axis = var_826, x = qk_15_cast_fp16)[name = string("op_890_cast_fp16")];
+            bool var_892_transpose_x_0 = const()[name = string("op_892_transpose_x_0"), val = bool(false)];
+            bool var_892_transpose_y_0 = const()[name = string("op_892_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_31_cast_fp16 = transpose(perm = var_886, x = var_885_cast_fp16)[name = string("transpose_91")];
+            tensor<fp16, [1, 12, 1500, 64]> var_892_cast_fp16 = matmul(transpose_x = var_892_transpose_x_0, transpose_y = var_892_transpose_y_0, x = var_890_cast_fp16, y = v_31_cast_fp16)[name = string("op_892_cast_fp16")];
+            tensor<int32, [4]> var_893 = const()[name = string("op_893"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_7 = const()[name = string("concat_7"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_894_cast_fp16 = transpose(perm = var_893, x = var_892_cast_fp16)[name = string("transpose_88")];
+            tensor<fp16, [1, 1500, 768]> x_95_cast_fp16 = reshape(shape = concat_7, x = var_894_cast_fp16)[name = string("x_95_cast_fp16")];
+            tensor<fp16, [768, 768]> var_898_to_fp16 = const()[name = string("op_898_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108988352)))];
+            tensor<fp16, [768]> var_899_to_fp16 = const()[name = string("op_899_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110168064)))];
+            tensor<fp16, [1, 1500, 768]> linear_45_cast_fp16 = linear(bias = var_899_to_fp16, weight = var_898_to_fp16, x = x_95_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_97_cast_fp16 = add(x = x_91_cast_fp16, y = linear_45_cast_fp16)[name = string("x_97_cast_fp16")];
+            tensor<int32, [1]> var_906_axes_0 = const()[name = string("op_906_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110169664)))];
+            tensor<fp16, [768]> blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110171264)))];
+            tensor<fp16, [1, 1500, 768]> var_906_cast_fp16 = layer_norm(axes = var_906_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_832_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_97_cast_fp16)[name = string("op_906_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_915_to_fp16 = const()[name = string("op_915_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110172864)))];
+            tensor<fp16, [3072]> var_916_to_fp16 = const()[name = string("op_916_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114891520)))];
+            tensor<fp16, [1, 1500, 3072]> linear_46_cast_fp16 = linear(bias = var_916_to_fp16, weight = var_915_to_fp16, x = var_906_cast_fp16)[name = string("linear_46_cast_fp16")];
+            string x_101_mode_0 = const()[name = string("x_101_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_101_cast_fp16 = gelu(mode = x_101_mode_0, x = linear_46_cast_fp16)[name = string("x_101_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114897728)))];
+            tensor<fp16, [768]> var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119616384)))];
+            tensor<fp16, [1, 1500, 768]> linear_47_cast_fp16 = linear(bias = var_922_to_fp16, weight = var_921_to_fp16, x = x_101_cast_fp16)[name = string("linear_47_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_47_cast_fp16)[name = string("x_103_cast_fp16")];
+            int32 var_932 = const()[name = string("op_932"), val = int32(-1)];
+            tensor<int32, [1]> var_948_axes_0 = const()[name = string("op_948_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119617984)))];
+            tensor<fp16, [768]> blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119619584)))];
+            fp16 var_938_to_fp16 = const()[name = string("op_938_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_948_cast_fp16 = layer_norm(axes = var_948_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_938_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_103_cast_fp16)[name = string("op_948_cast_fp16")];
+            tensor<fp16, [768, 768]> var_959_to_fp16 = const()[name = string("op_959_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119621184)))];
+            tensor<fp16, [768]> var_960_to_fp16 = const()[name = string("op_960_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120800896)))];
+            tensor<fp16, [1, 1500, 768]> linear_48_cast_fp16 = linear(bias = var_960_to_fp16, weight = var_959_to_fp16, x = var_948_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [768, 768]> var_963_to_fp16 = const()[name = string("op_963_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120802496)))];
+            tensor<fp16, [1, 1500, 768]> linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_963_to_fp16, x = var_948_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<fp16, [768, 768]> var_967_to_fp16 = const()[name = string("op_967_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121982208)))];
+            tensor<fp16, [768]> var_968_to_fp16 = const()[name = string("op_968_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123161920)))];
+            tensor<fp16, [1, 1500, 768]> linear_50_cast_fp16 = linear(bias = var_968_to_fp16, weight = var_967_to_fp16, x = var_948_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [4]> var_976 = const()[name = string("op_976"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_977_cast_fp16 = reshape(shape = var_976, x = linear_48_cast_fp16)[name = string("op_977_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_35_cast_fp16 = mul(x = var_977_cast_fp16, y = const_100_to_fp16)[name = string("q_35_cast_fp16")];
+            tensor<int32, [4]> var_983 = const()[name = string("op_983"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_984_cast_fp16 = reshape(shape = var_983, x = linear_49_cast_fp16)[name = string("op_984_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_35_cast_fp16 = mul(x = var_984_cast_fp16, y = const_101_to_fp16)[name = string("k_35_cast_fp16")];
+            tensor<int32, [4]> var_990 = const()[name = string("op_990"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_991_cast_fp16 = reshape(shape = var_990, x = linear_50_cast_fp16)[name = string("op_991_cast_fp16")];
+            tensor<int32, [4]> var_992 = const()[name = string("op_992"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
+            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_65 = transpose(perm = transpose_65_perm_0, x = k_35_cast_fp16)[name = string("transpose_85")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_64 = transpose(perm = transpose_64_perm_0, x = q_35_cast_fp16)[name = string("transpose_86")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("qk_17_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_996_cast_fp16 = softmax(axis = var_932, x = qk_17_cast_fp16)[name = string("op_996_cast_fp16")];
+            bool var_998_transpose_x_0 = const()[name = string("op_998_transpose_x_0"), val = bool(false)];
+            bool var_998_transpose_y_0 = const()[name = string("op_998_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_35_cast_fp16 = transpose(perm = var_992, x = var_991_cast_fp16)[name = string("transpose_87")];
+            tensor<fp16, [1, 12, 1500, 64]> var_998_cast_fp16 = matmul(transpose_x = var_998_transpose_x_0, transpose_y = var_998_transpose_y_0, x = var_996_cast_fp16, y = v_35_cast_fp16)[name = string("op_998_cast_fp16")];
+            tensor<int32, [4]> var_999 = const()[name = string("op_999"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1000_cast_fp16 = transpose(perm = var_999, x = var_998_cast_fp16)[name = string("transpose_84")];
+            tensor<fp16, [1, 1500, 768]> x_107_cast_fp16 = reshape(shape = concat_8, x = var_1000_cast_fp16)[name = string("x_107_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1004_to_fp16 = const()[name = string("op_1004_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123163520)))];
+            tensor<fp16, [768]> var_1005_to_fp16 = const()[name = string("op_1005_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124343232)))];
+            tensor<fp16, [1, 1500, 768]> linear_51_cast_fp16 = linear(bias = var_1005_to_fp16, weight = var_1004_to_fp16, x = x_107_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_109_cast_fp16 = add(x = x_103_cast_fp16, y = linear_51_cast_fp16)[name = string("x_109_cast_fp16")];
+            tensor<int32, [1]> var_1012_axes_0 = const()[name = string("op_1012_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124344832)))];
+            tensor<fp16, [768]> blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124346432)))];
+            tensor<fp16, [1, 1500, 768]> var_1012_cast_fp16 = layer_norm(axes = var_1012_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_938_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_109_cast_fp16)[name = string("op_1012_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_1021_to_fp16 = const()[name = string("op_1021_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124348032)))];
+            tensor<fp16, [3072]> var_1022_to_fp16 = const()[name = string("op_1022_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129066688)))];
+            tensor<fp16, [1, 1500, 3072]> linear_52_cast_fp16 = linear(bias = var_1022_to_fp16, weight = var_1021_to_fp16, x = var_1012_cast_fp16)[name = string("linear_52_cast_fp16")];
+            string x_113_mode_0 = const()[name = string("x_113_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_113_cast_fp16 = gelu(mode = x_113_mode_0, x = linear_52_cast_fp16)[name = string("x_113_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_1027_to_fp16 = const()[name = string("op_1027_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129072896)))];
+            tensor<fp16, [768]> var_1028_to_fp16 = const()[name = string("op_1028_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133791552)))];
+            tensor<fp16, [1, 1500, 768]> linear_53_cast_fp16 = linear(bias = var_1028_to_fp16, weight = var_1027_to_fp16, x = x_113_cast_fp16)[name = string("linear_53_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_115_cast_fp16 = add(x = x_109_cast_fp16, y = linear_53_cast_fp16)[name = string("x_115_cast_fp16")];
+            int32 var_1038 = const()[name = string("op_1038"), val = int32(-1)];
+            tensor<int32, [1]> var_1054_axes_0 = const()[name = string("op_1054_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133793152)))];
+            tensor<fp16, [768]> blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133794752)))];
+            fp16 var_1044_to_fp16 = const()[name = string("op_1044_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_1054_cast_fp16 = layer_norm(axes = var_1054_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_1044_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_115_cast_fp16)[name = string("op_1054_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1065_to_fp16 = const()[name = string("op_1065_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133796352)))];
+            tensor<fp16, [768]> var_1066_to_fp16 = const()[name = string("op_1066_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134976064)))];
+            tensor<fp16, [1, 1500, 768]> linear_54_cast_fp16 = linear(bias = var_1066_to_fp16, weight = var_1065_to_fp16, x = var_1054_cast_fp16)[name = string("linear_54_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1069_to_fp16 = const()[name = string("op_1069_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134977664)))];
+            tensor<fp16, [1, 1500, 768]> linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1069_to_fp16, x = var_1054_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1073_to_fp16 = const()[name = string("op_1073_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136157376)))];
+            tensor<fp16, [768]> var_1074_to_fp16 = const()[name = string("op_1074_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137337088)))];
+            tensor<fp16, [1, 1500, 768]> linear_56_cast_fp16 = linear(bias = var_1074_to_fp16, weight = var_1073_to_fp16, x = var_1054_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<int32, [4]> var_1082 = const()[name = string("op_1082"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1083_cast_fp16 = reshape(shape = var_1082, x = linear_54_cast_fp16)[name = string("op_1083_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_102_to_fp16 = const()[name = string("const_102_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_39_cast_fp16 = mul(x = var_1083_cast_fp16, y = const_102_to_fp16)[name = string("q_39_cast_fp16")];
+            tensor<int32, [4]> var_1089 = const()[name = string("op_1089"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1090_cast_fp16 = reshape(shape = var_1089, x = linear_55_cast_fp16)[name = string("op_1090_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_103_to_fp16 = const()[name = string("const_103_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_39_cast_fp16 = mul(x = var_1090_cast_fp16, y = const_103_to_fp16)[name = string("k_39_cast_fp16")];
+            tensor<int32, [4]> var_1096 = const()[name = string("op_1096"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1097_cast_fp16 = reshape(shape = var_1096, x = linear_56_cast_fp16)[name = string("op_1097_cast_fp16")];
+            tensor<int32, [4]> var_1098 = const()[name = string("op_1098"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
+            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_67 = transpose(perm = transpose_67_perm_0, x = k_39_cast_fp16)[name = string("transpose_81")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_66 = transpose(perm = transpose_66_perm_0, x = q_39_cast_fp16)[name = string("transpose_82")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("qk_19_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1102_cast_fp16 = softmax(axis = var_1038, x = qk_19_cast_fp16)[name = string("op_1102_cast_fp16")];
+            bool var_1104_transpose_x_0 = const()[name = string("op_1104_transpose_x_0"), val = bool(false)];
+            bool var_1104_transpose_y_0 = const()[name = string("op_1104_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_39_cast_fp16 = transpose(perm = var_1098, x = var_1097_cast_fp16)[name = string("transpose_83")];
+            tensor<fp16, [1, 12, 1500, 64]> var_1104_cast_fp16 = matmul(transpose_x = var_1104_transpose_x_0, transpose_y = var_1104_transpose_y_0, x = var_1102_cast_fp16, y = v_39_cast_fp16)[name = string("op_1104_cast_fp16")];
+            tensor<int32, [4]> var_1105 = const()[name = string("op_1105"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_9 = const()[name = string("concat_9"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1106_cast_fp16 = transpose(perm = var_1105, x = var_1104_cast_fp16)[name = string("transpose_80")];
+            tensor<fp16, [1, 1500, 768]> x_119_cast_fp16 = reshape(shape = concat_9, x = var_1106_cast_fp16)[name = string("x_119_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1110_to_fp16 = const()[name = string("op_1110_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137338688)))];
+            tensor<fp16, [768]> var_1111_to_fp16 = const()[name = string("op_1111_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138518400)))];
+            tensor<fp16, [1, 1500, 768]> linear_57_cast_fp16 = linear(bias = var_1111_to_fp16, weight = var_1110_to_fp16, x = x_119_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_121_cast_fp16 = add(x = x_115_cast_fp16, y = linear_57_cast_fp16)[name = string("x_121_cast_fp16")];
+            tensor<int32, [1]> var_1118_axes_0 = const()[name = string("op_1118_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138520000)))];
+            tensor<fp16, [768]> blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138521600)))];
+            tensor<fp16, [1, 1500, 768]> var_1118_cast_fp16 = layer_norm(axes = var_1118_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_1044_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_121_cast_fp16)[name = string("op_1118_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_1127_to_fp16 = const()[name = string("op_1127_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138523200)))];
+            tensor<fp16, [3072]> var_1128_to_fp16 = const()[name = string("op_1128_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143241856)))];
+            tensor<fp16, [1, 1500, 3072]> linear_58_cast_fp16 = linear(bias = var_1128_to_fp16, weight = var_1127_to_fp16, x = var_1118_cast_fp16)[name = string("linear_58_cast_fp16")];
+            string x_125_mode_0 = const()[name = string("x_125_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = linear_58_cast_fp16)[name = string("x_125_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_1133_to_fp16 = const()[name = string("op_1133_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143248064)))];
+            tensor<fp16, [768]> var_1134_to_fp16 = const()[name = string("op_1134_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147966720)))];
+            tensor<fp16, [1, 1500, 768]> linear_59_cast_fp16 = linear(bias = var_1134_to_fp16, weight = var_1133_to_fp16, x = x_125_cast_fp16)[name = string("linear_59_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_127_cast_fp16 = add(x = x_121_cast_fp16, y = linear_59_cast_fp16)[name = string("x_127_cast_fp16")];
+            int32 var_1144 = const()[name = string("op_1144"), val = int32(-1)];
+            tensor<int32, [1]> var_1160_axes_0 = const()[name = string("op_1160_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147968320)))];
+            tensor<fp16, [768]> blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147969920)))];
+            fp16 var_1150_to_fp16 = const()[name = string("op_1150_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_1160_cast_fp16 = layer_norm(axes = var_1160_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_1150_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_127_cast_fp16)[name = string("op_1160_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1171_to_fp16 = const()[name = string("op_1171_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147971520)))];
+            tensor<fp16, [768]> var_1172_to_fp16 = const()[name = string("op_1172_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149151232)))];
+            tensor<fp16, [1, 1500, 768]> linear_60_cast_fp16 = linear(bias = var_1172_to_fp16, weight = var_1171_to_fp16, x = var_1160_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1175_to_fp16 = const()[name = string("op_1175_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149152832)))];
+            tensor<fp16, [1, 1500, 768]> linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1175_to_fp16, x = var_1160_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1179_to_fp16 = const()[name = string("op_1179_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150332544)))];
+            tensor<fp16, [768]> var_1180_to_fp16 = const()[name = string("op_1180_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151512256)))];
+            tensor<fp16, [1, 1500, 768]> linear_62_cast_fp16 = linear(bias = var_1180_to_fp16, weight = var_1179_to_fp16, x = var_1160_cast_fp16)[name = string("linear_62_cast_fp16")];
+            tensor<int32, [4]> var_1188 = const()[name = string("op_1188"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1189_cast_fp16 = reshape(shape = var_1188, x = linear_60_cast_fp16)[name = string("op_1189_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_104_to_fp16 = const()[name = string("const_104_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_43_cast_fp16 = mul(x = var_1189_cast_fp16, y = const_104_to_fp16)[name = string("q_43_cast_fp16")];
+            tensor<int32, [4]> var_1195 = const()[name = string("op_1195"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1196_cast_fp16 = reshape(shape = var_1195, x = linear_61_cast_fp16)[name = string("op_1196_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_105_to_fp16 = const()[name = string("const_105_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_43_cast_fp16 = mul(x = var_1196_cast_fp16, y = const_105_to_fp16)[name = string("k_43_cast_fp16")];
+            tensor<int32, [4]> var_1202 = const()[name = string("op_1202"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1203_cast_fp16 = reshape(shape = var_1202, x = linear_62_cast_fp16)[name = string("op_1203_cast_fp16")];
+            tensor<int32, [4]> var_1204 = const()[name = string("op_1204"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_21_transpose_x_0 = const()[name = string("qk_21_transpose_x_0"), val = bool(false)];
+            bool qk_21_transpose_y_0 = const()[name = string("qk_21_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_69 = transpose(perm = transpose_69_perm_0, x = k_43_cast_fp16)[name = string("transpose_77")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_68 = transpose(perm = transpose_68_perm_0, x = q_43_cast_fp16)[name = string("transpose_78")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_21_cast_fp16 = matmul(transpose_x = qk_21_transpose_x_0, transpose_y = qk_21_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("qk_21_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1208_cast_fp16 = softmax(axis = var_1144, x = qk_21_cast_fp16)[name = string("op_1208_cast_fp16")];
+            bool var_1210_transpose_x_0 = const()[name = string("op_1210_transpose_x_0"), val = bool(false)];
+            bool var_1210_transpose_y_0 = const()[name = string("op_1210_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_43_cast_fp16 = transpose(perm = var_1204, x = var_1203_cast_fp16)[name = string("transpose_79")];
+            tensor<fp16, [1, 12, 1500, 64]> var_1210_cast_fp16 = matmul(transpose_x = var_1210_transpose_x_0, transpose_y = var_1210_transpose_y_0, x = var_1208_cast_fp16, y = v_43_cast_fp16)[name = string("op_1210_cast_fp16")];
+            tensor<int32, [4]> var_1211 = const()[name = string("op_1211"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_10 = const()[name = string("concat_10"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1212_cast_fp16 = transpose(perm = var_1211, x = var_1210_cast_fp16)[name = string("transpose_76")];
+            tensor<fp16, [1, 1500, 768]> x_131_cast_fp16 = reshape(shape = concat_10, x = var_1212_cast_fp16)[name = string("x_131_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1216_to_fp16 = const()[name = string("op_1216_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151513856)))];
+            tensor<fp16, [768]> var_1217_to_fp16 = const()[name = string("op_1217_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152693568)))];
+            tensor<fp16, [1, 1500, 768]> linear_63_cast_fp16 = linear(bias = var_1217_to_fp16, weight = var_1216_to_fp16, x = x_131_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_133_cast_fp16 = add(x = x_127_cast_fp16, y = linear_63_cast_fp16)[name = string("x_133_cast_fp16")];
+            tensor<int32, [1]> var_1224_axes_0 = const()[name = string("op_1224_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152695168)))];
+            tensor<fp16, [768]> blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152696768)))];
+            tensor<fp16, [1, 1500, 768]> var_1224_cast_fp16 = layer_norm(axes = var_1224_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_1150_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_133_cast_fp16)[name = string("op_1224_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_1233_to_fp16 = const()[name = string("op_1233_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152698368)))];
+            tensor<fp16, [3072]> var_1234_to_fp16 = const()[name = string("op_1234_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157417024)))];
+            tensor<fp16, [1, 1500, 3072]> linear_64_cast_fp16 = linear(bias = var_1234_to_fp16, weight = var_1233_to_fp16, x = var_1224_cast_fp16)[name = string("linear_64_cast_fp16")];
+            string x_137_mode_0 = const()[name = string("x_137_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_137_cast_fp16 = gelu(mode = x_137_mode_0, x = linear_64_cast_fp16)[name = string("x_137_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_1239_to_fp16 = const()[name = string("op_1239_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157423232)))];
+            tensor<fp16, [768]> var_1240_to_fp16 = const()[name = string("op_1240_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162141888)))];
+            tensor<fp16, [1, 1500, 768]> linear_65_cast_fp16 = linear(bias = var_1240_to_fp16, weight = var_1239_to_fp16, x = x_137_cast_fp16)[name = string("linear_65_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_139_cast_fp16 = add(x = x_133_cast_fp16, y = linear_65_cast_fp16)[name = string("x_139_cast_fp16")];
+            int32 var_1250 = const()[name = string("op_1250"), val = int32(-1)];
+            tensor<int32, [1]> var_1266_axes_0 = const()[name = string("op_1266_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162143488)))];
+            tensor<fp16, [768]> blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162145088)))];
+            fp16 var_1256_to_fp16 = const()[name = string("op_1256_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> var_1266_cast_fp16 = layer_norm(axes = var_1266_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_1256_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_139_cast_fp16)[name = string("op_1266_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1277_to_fp16 = const()[name = string("op_1277_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162146688)))];
+            tensor<fp16, [768]> var_1278_to_fp16 = const()[name = string("op_1278_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163326400)))];
+            tensor<fp16, [1, 1500, 768]> linear_66_cast_fp16 = linear(bias = var_1278_to_fp16, weight = var_1277_to_fp16, x = var_1266_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1281_to_fp16 = const()[name = string("op_1281_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163328000)))];
+            tensor<fp16, [1, 1500, 768]> linear_67_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1281_to_fp16, x = var_1266_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1285_to_fp16 = const()[name = string("op_1285_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164507712)))];
+            tensor<fp16, [768]> var_1286_to_fp16 = const()[name = string("op_1286_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165687424)))];
+            tensor<fp16, [1, 1500, 768]> linear_68_cast_fp16 = linear(bias = var_1286_to_fp16, weight = var_1285_to_fp16, x = var_1266_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [4]> var_1294 = const()[name = string("op_1294"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1295_cast_fp16 = reshape(shape = var_1294, x = linear_66_cast_fp16)[name = string("op_1295_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_106_to_fp16 = const()[name = string("const_106_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> q_cast_fp16 = mul(x = var_1295_cast_fp16, y = const_106_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_1301 = const()[name = string("op_1301"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1302_cast_fp16 = reshape(shape = var_1301, x = linear_67_cast_fp16)[name = string("op_1302_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_107_to_fp16 = const()[name = string("const_107_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 12, 64]> k_cast_fp16 = mul(x = var_1302_cast_fp16, y = const_107_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_1308 = const()[name = string("op_1308"), val = tensor<int32, [4]>([1, 1500, 12, -1])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1309_cast_fp16 = reshape(shape = var_1308, x = linear_68_cast_fp16)[name = string("op_1309_cast_fp16")];
+            tensor<int32, [4]> var_1310 = const()[name = string("op_1310"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 1500]> transpose_71 = transpose(perm = transpose_71_perm_0, x = k_cast_fp16)[name = string("transpose_73")];
+            tensor<fp16, [1, 12, 1500, 64]> transpose_70 = transpose(perm = transpose_70_perm_0, x = q_cast_fp16)[name = string("transpose_74")];
+            tensor<fp16, [1, 12, 1500, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1314_cast_fp16 = softmax(axis = var_1250, x = qk_cast_fp16)[name = string("op_1314_cast_fp16")];
+            bool var_1316_transpose_x_0 = const()[name = string("op_1316_transpose_x_0"), val = bool(false)];
+            bool var_1316_transpose_y_0 = const()[name = string("op_1316_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 64]> v_cast_fp16 = transpose(perm = var_1310, x = var_1309_cast_fp16)[name = string("transpose_75")];
+            tensor<fp16, [1, 12, 1500, 64]> var_1316_cast_fp16 = matmul(transpose_x = var_1316_transpose_x_0, transpose_y = var_1316_transpose_y_0, x = var_1314_cast_fp16, y = v_cast_fp16)[name = string("op_1316_cast_fp16")];
+            tensor<int32, [4]> var_1317 = const()[name = string("op_1317"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [3]>([1, 1500, 768])];
+            tensor<fp16, [1, 1500, 12, 64]> var_1318_cast_fp16 = transpose(perm = var_1317, x = var_1316_cast_fp16)[name = string("transpose_72")];
+            tensor<fp16, [1, 1500, 768]> x_143_cast_fp16 = reshape(shape = concat_11, x = var_1318_cast_fp16)[name = string("x_143_cast_fp16")];
+            tensor<fp16, [768, 768]> var_1322_to_fp16 = const()[name = string("op_1322_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165689024)))];
+            tensor<fp16, [768]> var_1323_to_fp16 = const()[name = string("op_1323_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166868736)))];
+            tensor<fp16, [1, 1500, 768]> linear_69_cast_fp16 = linear(bias = var_1323_to_fp16, weight = var_1322_to_fp16, x = x_143_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_145_cast_fp16 = add(x = x_139_cast_fp16, y = linear_69_cast_fp16)[name = string("x_145_cast_fp16")];
+            tensor<int32, [1]> var_1330_axes_0 = const()[name = string("op_1330_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166870336)))];
+            tensor<fp16, [768]> blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166871936)))];
+            tensor<fp16, [1, 1500, 768]> var_1330_cast_fp16 = layer_norm(axes = var_1330_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_1256_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_145_cast_fp16)[name = string("op_1330_cast_fp16")];
+            tensor<fp16, [3072, 768]> var_1339_to_fp16 = const()[name = string("op_1339_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166873536)))];
+            tensor<fp16, [3072]> var_1340_to_fp16 = const()[name = string("op_1340_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171592192)))];
+            tensor<fp16, [1, 1500, 3072]> linear_70_cast_fp16 = linear(bias = var_1340_to_fp16, weight = var_1339_to_fp16, x = var_1330_cast_fp16)[name = string("linear_70_cast_fp16")];
+            string x_149_mode_0 = const()[name = string("x_149_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 3072]> x_149_cast_fp16 = gelu(mode = x_149_mode_0, x = linear_70_cast_fp16)[name = string("x_149_cast_fp16")];
+            tensor<fp16, [768, 3072]> var_1345_to_fp16 = const()[name = string("op_1345_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171598400)))];
+            tensor<fp16, [768]> var_1346_to_fp16 = const()[name = string("op_1346_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176317056)))];
+            tensor<fp16, [1, 1500, 768]> linear_71_cast_fp16 = linear(bias = var_1346_to_fp16, weight = var_1345_to_fp16, x = x_149_cast_fp16)[name = string("linear_71_cast_fp16")];
+            tensor<fp16, [1, 1500, 768]> x_cast_fp16 = add(x = x_145_cast_fp16, y = linear_71_cast_fp16)[name = string("x_cast_fp16")];
+            tensor<int32, [1]> var_1359_axes_0 = const()[name = string("op_1359_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176318656)))];
+            tensor<fp16, [768]> ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176320256)))];
+            fp16 var_1350_to_fp16 = const()[name = string("op_1350_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 768]> output = layer_norm(axes = var_1359_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_1350_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_1359_cast_fp16")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/small/encoder.mlmodelc/weights/weight.bin b/small/encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..af87c7443a8aec92126d4359f862e98ecdcce744
--- /dev/null
+++ b/small/encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d3ab676977d57b06993ee7ebc638fc8568a99ddb11cb7a445328ce50fbd8b36
+size 176321856
diff --git a/small/model_dims.json b/small/model_dims.json
new file mode 100644
index 0000000000000000000000000000000000000000..f2c6f3bbc78ba2e4b17edea0fe4b151ef7a091a5
--- /dev/null
+++ b/small/model_dims.json
@@ -0,0 +1,12 @@
+{
+  "n_mels": 80,
+  "n_audio_ctx": 1500,
+  "n_audio_state": 768,
+  "n_audio_head": 12,
+  "n_audio_layer": 12,
+  "n_vocab": 51865,
+  "n_text_ctx": 448,
+  "n_text_state": 768,
+  "n_text_head": 12,
+  "n_text_layer": 12
+}
\ No newline at end of file
diff --git a/tiny/decoder_first.mlmodelc/analytics/coremldata.bin b/tiny/decoder_first.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a9e0451492e27debd13b9046778f57690e53b2c0
--- /dev/null
+++ b/tiny/decoder_first.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edbd82796122288fc8db28d43a2b33ea5d8e40f8dfe5f67bb51810d9e15cfd9a
+size 243
diff --git a/tiny/decoder_first.mlmodelc/coremldata.bin b/tiny/decoder_first.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..764b6b14959580bbb96df3603635a8995b081f27
--- /dev/null
+++ b/tiny/decoder_first.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de85178dbee1768815281dad14b531ee364fb91c8ea89d70889ca8aabae34d70
+size 453
diff --git a/tiny/decoder_first.mlmodelc/metadata.json b/tiny/decoder_first.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..6cf5bdcc3ff0beb4bc8eea0d52f4153be48b1d9e
--- /dev/null
+++ b/tiny/decoder_first.mlmodelc/metadata.json
@@ -0,0 +1,106 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "dummy",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.writeState" : 10,
+      "Shape" : 8,
+      "Ios18.linear" : 8,
+      "Identity" : 1,
+      "Ios18.gather" : 8,
+      "Ios18.concat" : 8,
+      "Ios18.sliceUpdate" : 10,
+      "Ios18.cast" : 16,
+      "Ios18.expandDims" : 8,
+      "Ios18.readState" : 10
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1 × 448 × 384)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 448, 384]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1 × 448 × 384)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 448, 384]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1 × 1500 × 384)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 1500, 384]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1 × 1500 × 384)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 1500, 384]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...1500 × 384",
+        "shapeRange" : "[[1, 1], [1, 1500], [384, 384]]",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 384)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1, 384]",
+        "name" : "audio_data",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_first",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/tiny/decoder_first.mlmodelc/model.mil b/tiny/decoder_first.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..9706e505fecf468b98b87c5c9b8e2e88c31f32db
--- /dev/null
+++ b/tiny/decoder_first.mlmodelc/model.mil
@@ -0,0 +1,255 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [1, ?, 384]> audio_data, state<tensor<fp16, [4, 1, 448, 384]>> k_cache1, state<tensor<fp16, [4, 1, 1500, 384]>> k_cache2, state<tensor<fp16, [4, 1, 448, 384]>> v_cache1, state<tensor<fp16, [4, 1, 1500, 384]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 384]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [384, 384]]}})))] {
+            tensor<fp16, [1, ?, 384]> dummy = identity(x = audio_data)[name = string("identity_0")];
+            tensor<fp16, [4, 1, 448, 384]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [4, 1, 448, 384]> const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor<fp16, [4, 1, 448, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [4, 1, 448, 384]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_10_write_state")];
+            tensor<fp16, [4, 1, 448, 384]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<fp16, [4, 1, 448, 384]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_11_write_state")];
+            tensor<fp16, [4, 1, 1500, 384]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<fp16, [4, 1, 1500, 384]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<fp16, [384, 384]> var_75_to_fp16 = const()[name = string("op_75_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1376384)))];
+            tensor<fp16, [384]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1671360)))];
+            tensor<fp16, [1, ?, 384]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_75_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [384, 384]> var_79_to_fp16 = const()[name = string("op_79_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1672192)))];
+            tensor<fp16, [384]> var_80_to_fp16 = const()[name = string("op_80_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1967168)))];
+            tensor<fp16, [1, ?, 384]> linear_1_cast_fp16 = linear(bias = var_80_to_fp16, weight = var_79_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")];
+            tensor<int32, [3]> var_82_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_82_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_82_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_82_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [3]> var_82_shape_cast_fp16_to_int16 = cast(dtype = var_82_shape_cast_fp16_to_int16_dtype_0, x = var_82_shape_cast_fp16)[name = string("cast_31")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_82_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_30")];
+            tensor<int32, [1]> expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")];
+            tensor<int32, [4]> concat_5 = const()[name = string("concat_5"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
+            bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 1500, 384]> k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_12_write_state")];
+            tensor<fp16, [4, 1, 1500, 384]> coreml_update_state_12 = read_state(input = k_cache2)[name = string("coreml_update_state_12")];
+            tensor<int32, [3]> var_87_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_87_shape_cast_fp16")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_87_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_87_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_87_shape_cast_fp16_to_uint16 = cast(dtype = var_87_shape_cast_fp16_to_uint16_dtype_0, x = var_87_shape_cast_fp16)[name = string("cast_29")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_87_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_28")];
+            tensor<int32, [1]> expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")];
+            tensor<int32, [4]> concat_8 = const()[name = string("concat_8"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [1]> concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)];
+            bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 1500, 384]> v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_13_write_state")];
+            tensor<fp16, [4, 1, 1500, 384]> coreml_update_state_13 = read_state(input = v_cache2)[name = string("coreml_update_state_13")];
+            tensor<fp16, [384, 384]> var_109_to_fp16 = const()[name = string("op_109_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1968000)))];
+            tensor<fp16, [1, ?, 384]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_109_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")];
+            tensor<fp16, [384, 384]> var_113_to_fp16 = const()[name = string("op_113_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2262976)))];
+            tensor<fp16, [384]> var_114_to_fp16 = const()[name = string("op_114_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2557952)))];
+            tensor<fp16, [1, ?, 384]> linear_3_cast_fp16 = linear(bias = var_114_to_fp16, weight = var_113_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")];
+            tensor<int32, [3]> var_116_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_116_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_116_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_116_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_116_shape_cast_fp16_to_uint16 = cast(dtype = var_116_shape_cast_fp16_to_uint16_dtype_0, x = var_116_shape_cast_fp16)[name = string("cast_27")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_116_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_26")];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")];
+            tensor<int32, [4]> concat_11 = const()[name = string("concat_11"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)];
+            bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 1500, 384]> k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_12)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_14_write_state")];
+            tensor<fp16, [4, 1, 1500, 384]> coreml_update_state_14 = read_state(input = k_cache2)[name = string("coreml_update_state_14")];
+            tensor<int32, [3]> var_121_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_121_shape_cast_fp16")];
+            int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)];
+            int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)];
+            bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)];
+            string var_121_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_121_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_121_shape_cast_fp16_to_uint16 = cast(dtype = var_121_shape_cast_fp16_to_uint16_dtype_0, x = var_121_shape_cast_fp16)[name = string("cast_25")];
+            uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_121_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")];
+            string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_24")];
+            tensor<int32, [1]> expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")];
+            tensor<int32, [4]> concat_14 = const()[name = string("concat_14"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [1]> concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 1500, 384]> v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_13)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_15_write_state")];
+            tensor<fp16, [4, 1, 1500, 384]> coreml_update_state_15 = read_state(input = v_cache2)[name = string("coreml_update_state_15")];
+            tensor<fp16, [384, 384]> var_143_to_fp16 = const()[name = string("op_143_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2558784)))];
+            tensor<fp16, [1, ?, 384]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_143_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")];
+            tensor<fp16, [384, 384]> var_147_to_fp16 = const()[name = string("op_147_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2853760)))];
+            tensor<fp16, [384]> var_148_to_fp16 = const()[name = string("op_148_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3148736)))];
+            tensor<fp16, [1, ?, 384]> linear_5_cast_fp16 = linear(bias = var_148_to_fp16, weight = var_147_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")];
+            tensor<int32, [3]> var_150_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_150_shape_cast_fp16")];
+            int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)];
+            int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)];
+            bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)];
+            string var_150_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_150_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_150_shape_cast_fp16_to_uint16 = cast(dtype = var_150_shape_cast_fp16_to_uint16_dtype_0, x = var_150_shape_cast_fp16)[name = string("cast_23")];
+            uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_150_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")];
+            string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_22")];
+            tensor<int32, [1]> expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")];
+            tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)];
+            bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 1500, 384]> k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_14)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_16_write_state")];
+            tensor<fp16, [4, 1, 1500, 384]> coreml_update_state_16 = read_state(input = k_cache2)[name = string("coreml_update_state_16")];
+            tensor<int32, [3]> var_155_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_155_shape_cast_fp16")];
+            int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)];
+            int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)];
+            bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)];
+            string var_155_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_155_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_155_shape_cast_fp16_to_uint16 = cast(dtype = var_155_shape_cast_fp16_to_uint16_dtype_0, x = var_155_shape_cast_fp16)[name = string("cast_21")];
+            uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_155_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")];
+            string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_20")];
+            tensor<int32, [1]> expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")];
+            tensor<int32, [4]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [1]> concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)];
+            bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 1500, 384]> v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_15)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_17_write_state")];
+            tensor<fp16, [4, 1, 1500, 384]> coreml_update_state_17 = read_state(input = v_cache2)[name = string("coreml_update_state_17")];
+            tensor<fp16, [384, 384]> var_177_to_fp16 = const()[name = string("op_177_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3149568)))];
+            tensor<fp16, [1, ?, 384]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_177_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [384, 384]> var_181_to_fp16 = const()[name = string("op_181_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3444544)))];
+            tensor<fp16, [384]> var_182_to_fp16 = const()[name = string("op_182_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3739520)))];
+            tensor<fp16, [1, ?, 384]> linear_7_cast_fp16 = linear(bias = var_182_to_fp16, weight = var_181_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")];
+            tensor<int32, [3]> var_184_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_184_shape_cast_fp16")];
+            int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)];
+            int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)];
+            bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)];
+            string var_184_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_184_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_184_shape_cast_fp16_to_uint16 = cast(dtype = var_184_shape_cast_fp16_to_uint16_dtype_0, x = var_184_shape_cast_fp16)[name = string("cast_19")];
+            uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_184_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")];
+            string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_18")];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")];
+            tensor<int32, [4]> concat_23 = const()[name = string("concat_23"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)];
+            bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")];
+            tensor<int32, [4]> k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 1500, 384]> k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_16)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_18_write_state")];
+            tensor<int32, [3]> var_189_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_189_shape_cast_fp16")];
+            int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)];
+            int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)];
+            bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)];
+            string var_189_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_189_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_189_shape_cast_fp16_to_uint16 = cast(dtype = var_189_shape_cast_fp16_to_uint16_dtype_0, x = var_189_shape_cast_fp16)[name = string("cast_17")];
+            uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_189_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")];
+            string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [1]> expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor<int32, [1]>([0])];
+            int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_16")];
+            tensor<int32, [1]> expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")];
+            tensor<int32, [4]> concat_26 = const()[name = string("concat_26"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 1500, 384]> v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_17)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_19_write_state")];
+        } -> (dummy);
+}
\ No newline at end of file
diff --git a/tiny/decoder_first.mlmodelc/weights/weight.bin b/tiny/decoder_first.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..de64c07fd917e8302af1c8586935350e5cc83994
--- /dev/null
+++ b/tiny/decoder_first.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0192b9200433d5cec5b0cdc705d745617174b5b3c1d833a84ae5cd3e07492f96
+size 3740352
diff --git a/tiny/decoder_second.mlmodelc/analytics/coremldata.bin b/tiny/decoder_second.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5414acfe5c5e402939504d6124b12ef69958c316
--- /dev/null
+++ b/tiny/decoder_second.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:661a8ad472d4f0b68e3a47c4b7a594f40b4b67df525bed3fd6d7de55823f521d
+size 243
diff --git a/tiny/decoder_second.mlmodelc/coremldata.bin b/tiny/decoder_second.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b0c579a261da3e7a026491f00e6cd78364fc0653
--- /dev/null
+++ b/tiny/decoder_second.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e8da6d1eec33635f6e3d6e0c32263ce9b070fc43e0b5a19355951c654eb3e66
+size 487
diff --git a/tiny/decoder_second.mlmodelc/metadata.json b/tiny/decoder_second.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..3637eae4f3db4041e6f4e232265ff46d19e51b6d
--- /dev/null
+++ b/tiny/decoder_second.mlmodelc/metadata.json
@@ -0,0 +1,127 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.linear" : 33,
+      "Ios18.readState" : 10,
+      "Ios18.expandDims" : 5,
+      "Ios18.sub" : 1,
+      "Ios18.matmul" : 16,
+      "Ios18.gelu" : 4,
+      "Ios18.gather" : 7,
+      "Ios18.concat" : 22,
+      "Shape" : 6,
+      "Ios18.add" : 21,
+      "Ios18.sliceUpdate" : 16,
+      "Ios18.sliceByIndex" : 33,
+      "Ios18.layerNorm" : 13,
+      "Ios18.cast" : 12,
+      "Ios18.transpose" : 32,
+      "Ios18.writeState" : 8,
+      "Ios18.reshape" : 32,
+      "Ios18.softmax" : 8,
+      "Ios18.mul" : 16
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1 × 448 × 384)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 448, 384]",
+        "name" : "k_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1 × 448 × 384)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 448, 384]",
+        "name" : "v_cache1",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1 × 1500 × 384)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 1500, 384]",
+        "name" : "k_cache2",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1 × 1500 × 384)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 1500, 384]",
+        "name" : "v_cache2",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Int32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Int32 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "token_data",
+        "shortDescription" : ""
+      },
+      {
+        "dataType" : "Float16",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1...448",
+        "shapeRange" : "[[1, 1], [1, 448]]",
+        "formattedType" : "MultiArray (Float16 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "offset_mask",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "decoder_second",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/tiny/decoder_second.mlmodelc/model.mil b/tiny/decoder_second.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..516a5a7a25dc63e55d651108a0d5f6aaa944c3af
--- /dev/null
+++ b/tiny/decoder_second.mlmodelc/model.mil
@@ -0,0 +1,838 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(state<tensor<fp16, [4, 1, 448, 384]>> k_cache1, state<tensor<fp16, [4, 1, 1500, 384]>> k_cache2, tensor<fp16, [1, ?]> offset_mask, tensor<int32, [1, ?]> token_data, state<tensor<fp16, [4, 1, 448, 384]>> v_cache1, state<tensor<fp16, [4, 1, 1500, 384]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] {
+            tensor<int32, [2]> var_22_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_22_shape_cast_fp16")];
+            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
+            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
+            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
+            string var_22_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_22_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
+            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
+            tensor<int16, [2]> var_22_shape_cast_fp16_to_int16 = cast(dtype = var_22_shape_cast_fp16_to_int16_dtype_0, x = var_22_shape_cast_fp16)[name = string("cast_58")];
+            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_22_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
+            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            tensor<int32, [2]> var_26_shape = shape(x = token_data)[name = string("op_26_shape")];
+            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
+            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
+            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
+            string var_26_shape_to_uint16_dtype_0 = const()[name = string("op_26_shape_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
+            tensor<uint16, [2]> var_26_shape_to_uint16 = cast(dtype = var_26_shape_to_uint16_dtype_0, x = var_26_shape)[name = string("cast_56")];
+            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_26_shape_to_uint16)[name = string("gather_1_cast_uint16")];
+            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_55")];
+            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_57")];
+            int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")];
+            int32 var_42_axis_0 = const()[name = string("op_42_axis_0"), val = int32(0)];
+            int32 var_42_batch_dims_0 = const()[name = string("op_42_batch_dims_0"), val = int32(0)];
+            bool var_42_validate_indices_0 = const()[name = string("op_42_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [51865, 384]> token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor<fp16, [51865, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, ?, 384]> var_42_cast_fp16 = gather(axis = var_42_axis_0, batch_dims = var_42_batch_dims_0, indices = token_data, validate_indices = var_42_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_42_cast_fp16")];
+            int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)];
+            int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)];
+            bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")];
+            int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(384)];
+            int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)];
+            bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")];
+            tensor<bool, [2]> var_45_end_mask_0 = const()[name = string("op_45_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 384]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [448, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39832448)))];
+            tensor<fp16, [?, ?]> var_45_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_45_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_45_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_3_cast_fp16 = add(x = var_42_cast_fp16, y = var_45_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<fp16, [4, 1, 448, 384]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
+            tensor<int32, [4]> k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 384])];
+            tensor<bool, [4]> k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 384]> k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")];
+            tensor<fp16, [4, 1, 448, 384]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
+            tensor<int32, [4]> v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 384])];
+            tensor<bool, [4]> v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 384]> v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")];
+            tensor<fp16, [4, 1, 1500, 384]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
+            tensor<int32, [4]> k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 384])];
+            tensor<bool, [4]> k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 384]> k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")];
+            tensor<fp16, [4, 1, 1500, 384]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
+            tensor<int32, [4]> v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 384])];
+            tensor<bool, [4]> v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 384]> v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")];
+            int32 var_67 = const()[name = string("op_67"), val = int32(-1)];
+            tensor<int32, [1]> var_85_axes_0 = const()[name = string("op_85_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40176576)))];
+            tensor<fp16, [384]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40177408)))];
+            fp16 var_73_to_fp16 = const()[name = string("op_73_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 384]> var_85_cast_fp16 = layer_norm(axes = var_85_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_85_cast_fp16")];
+            tensor<fp16, [384, 384]> var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40178240)))];
+            tensor<fp16, [384]> var_97_to_fp16 = const()[name = string("op_97_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40473216)))];
+            tensor<fp16, [1, ?, 384]> linear_0_cast_fp16 = linear(bias = var_97_to_fp16, weight = var_96_to_fp16, x = var_85_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [384, 384]> var_100_to_fp16 = const()[name = string("op_100_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40474048)))];
+            tensor<fp16, [384]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40769024)))];
+            tensor<fp16, [1, ?, 384]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_100_to_fp16, x = var_85_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [384, 384]> var_104_to_fp16 = const()[name = string("op_104_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40769856)))];
+            tensor<fp16, [384]> var_105_to_fp16 = const()[name = string("op_105_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41064832)))];
+            tensor<fp16, [1, ?, 384]> linear_2_cast_fp16 = linear(bias = var_105_to_fp16, weight = var_104_to_fp16, x = var_85_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [3]> var_107_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_107_shape_cast_fp16")];
+            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
+            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
+            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
+            string var_107_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_107_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_107_shape_cast_fp16_to_uint16 = cast(dtype = var_107_shape_cast_fp16_to_uint16_dtype_0, x = var_107_shape_cast_fp16)[name = string("cast_54")];
+            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_107_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
+            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_53")];
+            int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")];
+            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")];
+            tensor<int32, [1]> expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")];
+            tensor<int32, [1]> concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)];
+            bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")];
+            tensor<int32, [1]> concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)];
+            bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 448, 384]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_8_write_state")];
+            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_8 = read_state(input = k_cache1)[name = string("coreml_update_state_8")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 448, 384]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_9_write_state")];
+            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_9 = read_state(input = v_cache1)[name = string("coreml_update_state_9")];
+            int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)];
+            int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(384)];
+            int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
+            bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")];
+            tensor<int32, [3]> var_123_begin_0 = const()[name = string("op_123_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_123_end_mask_0 = const()[name = string("op_123_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 384]> var_123_cast_fp16 = slice_by_index(begin = var_123_begin_0, end = concat_10, end_mask = var_123_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_123_cast_fp16")];
+            tensor<int32, [3]> var_126_begin_0 = const()[name = string("op_126_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_126_end_mask_0 = const()[name = string("op_126_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 384]> var_126_cast_fp16 = slice_by_index(begin = var_126_begin_0, end = concat_10, end_mask = var_126_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_126_cast_fp16")];
+            tensor<int32, [4]> concat_12x = const()[name = string("concat_12x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_136_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_136_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_20_to_fp16 = const()[name = string("const_20_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> q_3_cast_fp16 = mul(x = var_136_cast_fp16, y = const_20_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> concat_13x = const()[name = string("concat_13x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_143_cast_fp16 = reshape(shape = concat_13x, x = var_123_cast_fp16)[name = string("op_143_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_21_to_fp16 = const()[name = string("const_21_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> k_5_cast_fp16 = mul(x = var_143_cast_fp16, y = const_21_to_fp16)[name = string("k_5_cast_fp16")];
+            tensor<int32, [4]> concat_14x = const()[name = string("concat_14x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_150_cast_fp16 = reshape(shape = concat_14x, x = var_126_cast_fp16)[name = string("op_150_cast_fp16")];
+            tensor<int32, [4]> var_151 = const()[name = string("op_151"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, ?]> transpose_34 = transpose(perm = transpose_34_perm_0, x = k_5_cast_fp16)[name = string("transpose_78")];
+            tensor<fp16, [1, 6, ?, 64]> transpose_33 = transpose(perm = transpose_33_perm_0, x = q_3_cast_fp16)[name = string("transpose_79")];
+            tensor<fp16, [1, 6, ?, ?]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_33, y = transpose_34)[name = string("qk_1_cast_fp16")];
+            int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)];
+            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
+            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")];
+            tensor<int32, [2]> var_154_begin_0 = const()[name = string("op_154_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_154_end_mask_0 = const()[name = string("op_154_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [448, 448]> mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor<fp16, [448, 448]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41065664)))];
+            tensor<fp16, [?, 448]> var_154_cast_fp16 = slice_by_index(begin = var_154_begin_0, end = concat_15, end_mask = var_154_end_mask_0, x = mask_to_fp16)[name = string("op_154_cast_fp16")];
+            int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)];
+            int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)];
+            bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")];
+            tensor<int32, [2]> var_155_begin_0 = const()[name = string("op_155_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_155_end_mask_0 = const()[name = string("op_155_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = concat_16, end_mask = var_155_end_mask_0, x = var_154_cast_fp16)[name = string("op_155_cast_fp16")];
+            tensor<fp16, [1, 6, ?, ?]> qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_155_cast_fp16)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 6, ?, ?]> var_158_cast_fp16 = softmax(axis = var_67, x = qk_3_cast_fp16)[name = string("op_158_cast_fp16")];
+            bool var_160_transpose_x_0 = const()[name = string("op_160_transpose_x_0"), val = bool(false)];
+            bool var_160_transpose_y_0 = const()[name = string("op_160_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, ?, 64]> v_5_cast_fp16 = transpose(perm = var_151, x = var_150_cast_fp16)[name = string("transpose_80")];
+            tensor<fp16, [1, 6, ?, 64]> var_160_cast_fp16 = matmul(transpose_x = var_160_transpose_x_0, transpose_y = var_160_transpose_y_0, x = var_158_cast_fp16, y = v_5_cast_fp16)[name = string("op_160_cast_fp16")];
+            tensor<int32, [4]> var_161 = const()[name = string("op_161"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_17x = const()[name = string("concat_17x"), val = tensor<int32, [3]>([1, -1, 384])];
+            tensor<fp16, [1, ?, 6, 64]> var_162_cast_fp16 = transpose(perm = var_161, x = var_160_cast_fp16)[name = string("transpose_77")];
+            tensor<fp16, [1, ?, 384]> x_7_cast_fp16 = reshape(shape = concat_17x, x = var_162_cast_fp16)[name = string("x_7_cast_fp16")];
+            tensor<fp16, [384, 384]> var_166_to_fp16 = const()[name = string("op_166_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41467136)))];
+            tensor<fp16, [384]> var_167_to_fp16 = const()[name = string("op_167_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41762112)))];
+            tensor<fp16, [1, ?, 384]> linear_3_cast_fp16 = linear(bias = var_167_to_fp16, weight = var_166_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")];
+            tensor<int32, [1]> var_174_axes_0 = const()[name = string("op_174_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41762944)))];
+            tensor<fp16, [384]> blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41763776)))];
+            tensor<fp16, [1, ?, 384]> var_174_cast_fp16 = layer_norm(axes = var_174_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_174_cast_fp16")];
+            tensor<fp16, [384, 384]> var_183_to_fp16 = const()[name = string("op_183_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41764608)))];
+            tensor<fp16, [384]> var_184_to_fp16 = const()[name = string("op_184_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42059584)))];
+            tensor<fp16, [1, ?, 384]> linear_4_cast_fp16 = linear(bias = var_184_to_fp16, weight = var_183_to_fp16, x = var_174_cast_fp16)[name = string("linear_4_cast_fp16")];
+            tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 384]> k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor<fp16, [1, 1500, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42060416)))];
+            tensor<fp16, [1, 1500, 384]> k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 384]> v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_22x = const()[name = string("concat_22x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_204_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_204_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_22_to_fp16 = const()[name = string("const_22_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> q_7_cast_fp16 = mul(x = var_204_cast_fp16, y = const_22_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_210 = const()[name = string("op_210"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_211_cast_fp16 = reshape(shape = var_210, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_211_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_23_to_fp16 = const()[name = string("const_23_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> k_9_cast_fp16 = mul(x = var_211_cast_fp16, y = const_23_to_fp16)[name = string("k_9_cast_fp16")];
+            tensor<int32, [4]> var_217 = const()[name = string("op_217"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_218_cast_fp16 = reshape(shape = var_217, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_218_cast_fp16")];
+            tensor<int32, [4]> var_219 = const()[name = string("op_219"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, 1500]> transpose_36 = transpose(perm = transpose_36_perm_0, x = k_9_cast_fp16)[name = string("transpose_74")];
+            tensor<fp16, [1, 6, ?, 64]> transpose_35 = transpose(perm = transpose_35_perm_0, x = q_7_cast_fp16)[name = string("transpose_75")];
+            tensor<fp16, [1, 6, ?, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_35, y = transpose_36)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 6, ?, 1500]> var_223_cast_fp16 = softmax(axis = var_67, x = qk_5_cast_fp16)[name = string("op_223_cast_fp16")];
+            bool var_225_transpose_x_0 = const()[name = string("op_225_transpose_x_0"), val = bool(false)];
+            bool var_225_transpose_y_0 = const()[name = string("op_225_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, 1500, 64]> v_9_cast_fp16 = transpose(perm = var_219, x = var_218_cast_fp16)[name = string("transpose_76")];
+            tensor<fp16, [1, 6, ?, 64]> var_225_cast_fp16 = matmul(transpose_x = var_225_transpose_x_0, transpose_y = var_225_transpose_y_0, x = var_223_cast_fp16, y = v_9_cast_fp16)[name = string("op_225_cast_fp16")];
+            tensor<int32, [4]> var_226 = const()[name = string("op_226"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_23x = const()[name = string("concat_23x"), val = tensor<int32, [3]>([1, -1, 384])];
+            tensor<fp16, [1, ?, 6, 64]> var_227_cast_fp16 = transpose(perm = var_226, x = var_225_cast_fp16)[name = string("transpose_73")];
+            tensor<fp16, [1, ?, 384]> x_13_cast_fp16 = reshape(shape = concat_23x, x = var_227_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<fp16, [384, 384]> var_231_to_fp16 = const()[name = string("op_231_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43212480)))];
+            tensor<fp16, [384]> var_232_to_fp16 = const()[name = string("op_232_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43507456)))];
+            tensor<fp16, [1, ?, 384]> linear_5_cast_fp16 = linear(bias = var_232_to_fp16, weight = var_231_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")];
+            tensor<int32, [1]> var_239_axes_0 = const()[name = string("op_239_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43508288)))];
+            tensor<fp16, [384]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43509120)))];
+            tensor<fp16, [1, ?, 384]> var_239_cast_fp16 = layer_norm(axes = var_239_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_239_cast_fp16")];
+            tensor<fp16, [1536, 384]> var_248_to_fp16 = const()[name = string("op_248_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43509952)))];
+            tensor<fp16, [1536]> var_249_to_fp16 = const()[name = string("op_249_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44689664)))];
+            tensor<fp16, [1, ?, 1536]> linear_6_cast_fp16 = linear(bias = var_249_to_fp16, weight = var_248_to_fp16, x = var_239_cast_fp16)[name = string("linear_6_cast_fp16")];
+            string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 1536]> x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")];
+            tensor<fp16, [384, 1536]> var_254_to_fp16 = const()[name = string("op_254_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44692800)))];
+            tensor<fp16, [384]> var_255_to_fp16 = const()[name = string("op_255_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45872512)))];
+            tensor<fp16, [1, ?, 384]> linear_7_cast_fp16 = linear(bias = var_255_to_fp16, weight = var_254_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")];
+            tensor<int32, [4]> k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 384])];
+            tensor<bool, [4]> k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 384]> k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_8)[name = string("k_cache_5_cast_fp16")];
+            tensor<int32, [4]> v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 384])];
+            tensor<bool, [4]> v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 384]> v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_9)[name = string("v_cache_5_cast_fp16")];
+            tensor<int32, [4]> k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 384])];
+            tensor<bool, [4]> k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 384]> k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")];
+            tensor<int32, [4]> v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 384])];
+            tensor<bool, [4]> v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 384]> v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")];
+            int32 var_277 = const()[name = string("op_277"), val = int32(-1)];
+            tensor<int32, [1]> var_295_axes_0 = const()[name = string("op_295_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45873344)))];
+            tensor<fp16, [384]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45874176)))];
+            fp16 var_283_to_fp16 = const()[name = string("op_283_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 384]> var_295_cast_fp16 = layer_norm(axes = var_295_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_295_cast_fp16")];
+            tensor<fp16, [384, 384]> var_306_to_fp16 = const()[name = string("op_306_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45875008)))];
+            tensor<fp16, [384]> var_307_to_fp16 = const()[name = string("op_307_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46169984)))];
+            tensor<fp16, [1, ?, 384]> linear_8_cast_fp16 = linear(bias = var_307_to_fp16, weight = var_306_to_fp16, x = var_295_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<fp16, [384, 384]> var_310_to_fp16 = const()[name = string("op_310_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46170816)))];
+            tensor<fp16, [1, ?, 384]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_310_to_fp16, x = var_295_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [384, 384]> var_314_to_fp16 = const()[name = string("op_314_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46465792)))];
+            tensor<fp16, [384]> var_315_to_fp16 = const()[name = string("op_315_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46760768)))];
+            tensor<fp16, [1, ?, 384]> linear_10_cast_fp16 = linear(bias = var_315_to_fp16, weight = var_314_to_fp16, x = var_295_cast_fp16)[name = string("linear_10_cast_fp16")];
+            tensor<int32, [3]> var_317_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_317_shape_cast_fp16")];
+            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
+            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
+            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
+            string var_317_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_317_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_317_shape_cast_fp16_to_uint16 = cast(dtype = var_317_shape_cast_fp16_to_uint16_dtype_0, x = var_317_shape_cast_fp16)[name = string("cast_52")];
+            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_317_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
+            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_51")];
+            int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")];
+            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")];
+            tensor<int32, [1]> concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor<int32, [1]>([1])];
+            int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
+            bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")];
+            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
+            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 448, 384]> k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_8)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_10_write_state")];
+            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_10 = read_state(input = k_cache1)[name = string("coreml_update_state_10")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 448, 384]> v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_9)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_11_write_state")];
+            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_11 = read_state(input = v_cache1)[name = string("coreml_update_state_11")];
+            int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)];
+            int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(384)];
+            int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)];
+            bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")];
+            tensor<int32, [3]> var_333_begin_0 = const()[name = string("op_333_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_333_end_mask_0 = const()[name = string("op_333_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 384]> var_333_cast_fp16 = slice_by_index(begin = var_333_begin_0, end = concat_32, end_mask = var_333_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_333_cast_fp16")];
+            tensor<int32, [3]> var_336_begin_0 = const()[name = string("op_336_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_336_end_mask_0 = const()[name = string("op_336_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 384]> var_336_cast_fp16 = slice_by_index(begin = var_336_begin_0, end = concat_32, end_mask = var_336_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_336_cast_fp16")];
+            tensor<int32, [4]> concat_34x = const()[name = string("concat_34x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_346_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_346_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_24_to_fp16 = const()[name = string("const_24_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> q_11_cast_fp16 = mul(x = var_346_cast_fp16, y = const_24_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> concat_35x = const()[name = string("concat_35x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_353_cast_fp16 = reshape(shape = concat_35x, x = var_333_cast_fp16)[name = string("op_353_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_25_to_fp16 = const()[name = string("const_25_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> k_15_cast_fp16 = mul(x = var_353_cast_fp16, y = const_25_to_fp16)[name = string("k_15_cast_fp16")];
+            tensor<int32, [4]> concat_36x = const()[name = string("concat_36x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_360_cast_fp16 = reshape(shape = concat_36x, x = var_336_cast_fp16)[name = string("op_360_cast_fp16")];
+            tensor<int32, [4]> var_361 = const()[name = string("op_361"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
+            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, ?]> transpose_38 = transpose(perm = transpose_38_perm_0, x = k_15_cast_fp16)[name = string("transpose_70")];
+            tensor<fp16, [1, 6, ?, 64]> transpose_37 = transpose(perm = transpose_37_perm_0, x = q_11_cast_fp16)[name = string("transpose_71")];
+            tensor<fp16, [1, 6, ?, ?]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_37, y = transpose_38)[name = string("qk_7_cast_fp16")];
+            int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)];
+            int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)];
+            bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")];
+            tensor<int32, [2]> var_364_begin_0 = const()[name = string("op_364_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_364_end_mask_0 = const()[name = string("op_364_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = concat_37, end_mask = var_364_end_mask_0, x = mask_to_fp16)[name = string("op_364_cast_fp16")];
+            int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)];
+            int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
+            bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")];
+            tensor<int32, [2]> var_365_begin_0 = const()[name = string("op_365_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_365_end_mask_0 = const()[name = string("op_365_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_365_cast_fp16 = slice_by_index(begin = var_365_begin_0, end = concat_38, end_mask = var_365_end_mask_0, x = var_364_cast_fp16)[name = string("op_365_cast_fp16")];
+            tensor<fp16, [1, 6, ?, ?]> qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_365_cast_fp16)[name = string("qk_9_cast_fp16")];
+            tensor<fp16, [1, 6, ?, ?]> var_368_cast_fp16 = softmax(axis = var_277, x = qk_9_cast_fp16)[name = string("op_368_cast_fp16")];
+            bool var_370_transpose_x_0 = const()[name = string("op_370_transpose_x_0"), val = bool(false)];
+            bool var_370_transpose_y_0 = const()[name = string("op_370_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, ?, 64]> v_15_cast_fp16 = transpose(perm = var_361, x = var_360_cast_fp16)[name = string("transpose_72")];
+            tensor<fp16, [1, 6, ?, 64]> var_370_cast_fp16 = matmul(transpose_x = var_370_transpose_x_0, transpose_y = var_370_transpose_y_0, x = var_368_cast_fp16, y = v_15_cast_fp16)[name = string("op_370_cast_fp16")];
+            tensor<int32, [4]> var_371 = const()[name = string("op_371"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_39x = const()[name = string("concat_39x"), val = tensor<int32, [3]>([1, -1, 384])];
+            tensor<fp16, [1, ?, 6, 64]> var_372_cast_fp16 = transpose(perm = var_371, x = var_370_cast_fp16)[name = string("transpose_69")];
+            tensor<fp16, [1, ?, 384]> x_25_cast_fp16 = reshape(shape = concat_39x, x = var_372_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<fp16, [384, 384]> var_376_to_fp16 = const()[name = string("op_376_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46761600)))];
+            tensor<fp16, [384]> var_377_to_fp16 = const()[name = string("op_377_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47056576)))];
+            tensor<fp16, [1, ?, 384]> linear_11_cast_fp16 = linear(bias = var_377_to_fp16, weight = var_376_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")];
+            tensor<int32, [1]> var_384_axes_0 = const()[name = string("op_384_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47057408)))];
+            tensor<fp16, [384]> blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47058240)))];
+            tensor<fp16, [1, ?, 384]> var_384_cast_fp16 = layer_norm(axes = var_384_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_384_cast_fp16")];
+            tensor<fp16, [384, 384]> var_393_to_fp16 = const()[name = string("op_393_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47059072)))];
+            tensor<fp16, [384]> var_394_to_fp16 = const()[name = string("op_394_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47354048)))];
+            tensor<fp16, [1, ?, 384]> linear_12_cast_fp16 = linear(bias = var_394_to_fp16, weight = var_393_to_fp16, x = var_384_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<int32, [3]> concat_40 = const()[name = string("concat_40"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 384]> k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_42 = const()[name = string("concat_42"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_43 = const()[name = string("concat_43"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 384]> v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_44x = const()[name = string("concat_44x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_414_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_414_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_26_to_fp16 = const()[name = string("const_26_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> q_15_cast_fp16 = mul(x = var_414_cast_fp16, y = const_26_to_fp16)[name = string("q_15_cast_fp16")];
+            tensor<int32, [4]> var_420 = const()[name = string("op_420"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_421_cast_fp16 = reshape(shape = var_420, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_421_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_27_to_fp16 = const()[name = string("const_27_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> k_19_cast_fp16 = mul(x = var_421_cast_fp16, y = const_27_to_fp16)[name = string("k_19_cast_fp16")];
+            tensor<int32, [4]> var_427 = const()[name = string("op_427"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_428_cast_fp16 = reshape(shape = var_427, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_428_cast_fp16")];
+            tensor<int32, [4]> var_429 = const()[name = string("op_429"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
+            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_39_perm_0 = const()[name = string("transpose_39_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, 1500]> transpose_40 = transpose(perm = transpose_40_perm_0, x = k_19_cast_fp16)[name = string("transpose_66")];
+            tensor<fp16, [1, 6, ?, 64]> transpose_39 = transpose(perm = transpose_39_perm_0, x = q_15_cast_fp16)[name = string("transpose_67")];
+            tensor<fp16, [1, 6, ?, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_39, y = transpose_40)[name = string("qk_11_cast_fp16")];
+            tensor<fp16, [1, 6, ?, 1500]> var_433_cast_fp16 = softmax(axis = var_277, x = qk_11_cast_fp16)[name = string("op_433_cast_fp16")];
+            bool var_435_transpose_x_0 = const()[name = string("op_435_transpose_x_0"), val = bool(false)];
+            bool var_435_transpose_y_0 = const()[name = string("op_435_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_429, x = var_428_cast_fp16)[name = string("transpose_68")];
+            tensor<fp16, [1, 6, ?, 64]> var_435_cast_fp16 = matmul(transpose_x = var_435_transpose_x_0, transpose_y = var_435_transpose_y_0, x = var_433_cast_fp16, y = v_19_cast_fp16)[name = string("op_435_cast_fp16")];
+            tensor<int32, [4]> var_436 = const()[name = string("op_436"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_45x = const()[name = string("concat_45x"), val = tensor<int32, [3]>([1, -1, 384])];
+            tensor<fp16, [1, ?, 6, 64]> var_437_cast_fp16 = transpose(perm = var_436, x = var_435_cast_fp16)[name = string("transpose_65")];
+            tensor<fp16, [1, ?, 384]> x_31_cast_fp16 = reshape(shape = concat_45x, x = var_437_cast_fp16)[name = string("x_31_cast_fp16")];
+            tensor<fp16, [384, 384]> var_441_to_fp16 = const()[name = string("op_441_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47354880)))];
+            tensor<fp16, [384]> var_442_to_fp16 = const()[name = string("op_442_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47649856)))];
+            tensor<fp16, [1, ?, 384]> linear_13_cast_fp16 = linear(bias = var_442_to_fp16, weight = var_441_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")];
+            tensor<int32, [1]> var_449_axes_0 = const()[name = string("op_449_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47650688)))];
+            tensor<fp16, [384]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47651520)))];
+            tensor<fp16, [1, ?, 384]> var_449_cast_fp16 = layer_norm(axes = var_449_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_449_cast_fp16")];
+            tensor<fp16, [1536, 384]> var_458_to_fp16 = const()[name = string("op_458_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47652352)))];
+            tensor<fp16, [1536]> var_459_to_fp16 = const()[name = string("op_459_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48832064)))];
+            tensor<fp16, [1, ?, 1536]> linear_14_cast_fp16 = linear(bias = var_459_to_fp16, weight = var_458_to_fp16, x = var_449_cast_fp16)[name = string("linear_14_cast_fp16")];
+            string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 1536]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<fp16, [384, 1536]> var_464_to_fp16 = const()[name = string("op_464_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48835200)))];
+            tensor<fp16, [384]> var_465_to_fp16 = const()[name = string("op_465_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50014912)))];
+            tensor<fp16, [1, ?, 384]> linear_15_cast_fp16 = linear(bias = var_465_to_fp16, weight = var_464_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")];
+            tensor<int32, [4]> k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 384])];
+            tensor<bool, [4]> k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 384]> k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_10)[name = string("k_cache_9_cast_fp16")];
+            tensor<int32, [4]> v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 384])];
+            tensor<bool, [4]> v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 384]> v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_11)[name = string("v_cache_9_cast_fp16")];
+            tensor<int32, [4]> k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 384])];
+            tensor<bool, [4]> k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 384]> k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")];
+            tensor<int32, [4]> v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 384])];
+            tensor<bool, [4]> v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 384]> v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")];
+            int32 var_487 = const()[name = string("op_487"), val = int32(-1)];
+            tensor<int32, [1]> var_505_axes_0 = const()[name = string("op_505_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50015744)))];
+            tensor<fp16, [384]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50016576)))];
+            fp16 var_493_to_fp16 = const()[name = string("op_493_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 384]> var_505_cast_fp16 = layer_norm(axes = var_505_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_493_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_505_cast_fp16")];
+            tensor<fp16, [384, 384]> var_516_to_fp16 = const()[name = string("op_516_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50017408)))];
+            tensor<fp16, [384]> var_517_to_fp16 = const()[name = string("op_517_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50312384)))];
+            tensor<fp16, [1, ?, 384]> linear_16_cast_fp16 = linear(bias = var_517_to_fp16, weight = var_516_to_fp16, x = var_505_cast_fp16)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [384, 384]> var_520_to_fp16 = const()[name = string("op_520_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50313216)))];
+            tensor<fp16, [1, ?, 384]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_520_to_fp16, x = var_505_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [384, 384]> var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50608192)))];
+            tensor<fp16, [384]> var_525_to_fp16 = const()[name = string("op_525_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50903168)))];
+            tensor<fp16, [1, ?, 384]> linear_18_cast_fp16 = linear(bias = var_525_to_fp16, weight = var_524_to_fp16, x = var_505_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<int32, [3]> var_527_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_527_shape_cast_fp16")];
+            int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
+            int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
+            bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
+            string var_527_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_527_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_527_shape_cast_fp16_to_uint16 = cast(dtype = var_527_shape_cast_fp16_to_uint16_dtype_0, x = var_527_shape_cast_fp16)[name = string("cast_50")];
+            uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_527_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
+            string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_49")];
+            int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")];
+            tensor<int32, [1]> expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")];
+            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([2])];
+            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
+            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")];
+            tensor<int32, [1]> concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)];
+            bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 448, 384]> k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_10)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_12_write_state")];
+            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_12 = read_state(input = k_cache1)[name = string("coreml_update_state_12")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 448, 384]> v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_11)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_13_write_state")];
+            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_13 = read_state(input = v_cache1)[name = string("coreml_update_state_13")];
+            int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)];
+            int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(384)];
+            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
+            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")];
+            tensor<int32, [3]> var_543_begin_0 = const()[name = string("op_543_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_543_end_mask_0 = const()[name = string("op_543_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 384]> var_543_cast_fp16 = slice_by_index(begin = var_543_begin_0, end = concat_54, end_mask = var_543_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_543_cast_fp16")];
+            tensor<int32, [3]> var_546_begin_0 = const()[name = string("op_546_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_546_end_mask_0 = const()[name = string("op_546_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 384]> var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = concat_54, end_mask = var_546_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_546_cast_fp16")];
+            tensor<int32, [4]> concat_56x = const()[name = string("concat_56x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_556_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_556_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_28_to_fp16 = const()[name = string("const_28_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> q_19_cast_fp16 = mul(x = var_556_cast_fp16, y = const_28_to_fp16)[name = string("q_19_cast_fp16")];
+            tensor<int32, [4]> concat_57x = const()[name = string("concat_57x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_563_cast_fp16 = reshape(shape = concat_57x, x = var_543_cast_fp16)[name = string("op_563_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> k_25_cast_fp16 = mul(x = var_563_cast_fp16, y = const_29_to_fp16)[name = string("k_25_cast_fp16")];
+            tensor<int32, [4]> concat_58x = const()[name = string("concat_58x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_570_cast_fp16 = reshape(shape = concat_58x, x = var_546_cast_fp16)[name = string("op_570_cast_fp16")];
+            tensor<int32, [4]> var_571 = const()[name = string("op_571"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
+            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, ?]> transpose_42 = transpose(perm = transpose_42_perm_0, x = k_25_cast_fp16)[name = string("transpose_62")];
+            tensor<fp16, [1, 6, ?, 64]> transpose_41 = transpose(perm = transpose_41_perm_0, x = q_19_cast_fp16)[name = string("transpose_63")];
+            tensor<fp16, [1, 6, ?, ?]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_41, y = transpose_42)[name = string("qk_13_cast_fp16")];
+            int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)];
+            int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)];
+            bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")];
+            tensor<int32, [2]> var_574_begin_0 = const()[name = string("op_574_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_574_end_mask_0 = const()[name = string("op_574_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = concat_59, end_mask = var_574_end_mask_0, x = mask_to_fp16)[name = string("op_574_cast_fp16")];
+            int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)];
+            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
+            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")];
+            tensor<int32, [2]> var_575_begin_0 = const()[name = string("op_575_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_575_end_mask_0 = const()[name = string("op_575_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_575_cast_fp16 = slice_by_index(begin = var_575_begin_0, end = concat_60, end_mask = var_575_end_mask_0, x = var_574_cast_fp16)[name = string("op_575_cast_fp16")];
+            tensor<fp16, [1, 6, ?, ?]> qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_575_cast_fp16)[name = string("qk_15_cast_fp16")];
+            tensor<fp16, [1, 6, ?, ?]> var_578_cast_fp16 = softmax(axis = var_487, x = qk_15_cast_fp16)[name = string("op_578_cast_fp16")];
+            bool var_580_transpose_x_0 = const()[name = string("op_580_transpose_x_0"), val = bool(false)];
+            bool var_580_transpose_y_0 = const()[name = string("op_580_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, ?, 64]> v_25_cast_fp16 = transpose(perm = var_571, x = var_570_cast_fp16)[name = string("transpose_64")];
+            tensor<fp16, [1, 6, ?, 64]> var_580_cast_fp16 = matmul(transpose_x = var_580_transpose_x_0, transpose_y = var_580_transpose_y_0, x = var_578_cast_fp16, y = v_25_cast_fp16)[name = string("op_580_cast_fp16")];
+            tensor<int32, [4]> var_581 = const()[name = string("op_581"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_61x = const()[name = string("concat_61x"), val = tensor<int32, [3]>([1, -1, 384])];
+            tensor<fp16, [1, ?, 6, 64]> var_582_cast_fp16 = transpose(perm = var_581, x = var_580_cast_fp16)[name = string("transpose_61")];
+            tensor<fp16, [1, ?, 384]> x_43_cast_fp16 = reshape(shape = concat_61x, x = var_582_cast_fp16)[name = string("x_43_cast_fp16")];
+            tensor<fp16, [384, 384]> var_586_to_fp16 = const()[name = string("op_586_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50904000)))];
+            tensor<fp16, [384]> var_587_to_fp16 = const()[name = string("op_587_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51198976)))];
+            tensor<fp16, [1, ?, 384]> linear_19_cast_fp16 = linear(bias = var_587_to_fp16, weight = var_586_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")];
+            tensor<int32, [1]> var_594_axes_0 = const()[name = string("op_594_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51199808)))];
+            tensor<fp16, [384]> blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51200640)))];
+            tensor<fp16, [1, ?, 384]> var_594_cast_fp16 = layer_norm(axes = var_594_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_493_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_594_cast_fp16")];
+            tensor<fp16, [384, 384]> var_603_to_fp16 = const()[name = string("op_603_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51201472)))];
+            tensor<fp16, [384]> var_604_to_fp16 = const()[name = string("op_604_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51496448)))];
+            tensor<fp16, [1, ?, 384]> linear_20_cast_fp16 = linear(bias = var_604_to_fp16, weight = var_603_to_fp16, x = var_594_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [3]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_63 = const()[name = string("concat_63"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 384]> k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_64 = const()[name = string("concat_64"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 384]> v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_66x = const()[name = string("concat_66x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_624_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_624_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> q_23_cast_fp16 = mul(x = var_624_cast_fp16, y = const_30_to_fp16)[name = string("q_23_cast_fp16")];
+            tensor<int32, [4]> var_630 = const()[name = string("op_630"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_631_cast_fp16 = reshape(shape = var_630, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_631_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> k_29_cast_fp16 = mul(x = var_631_cast_fp16, y = const_31_to_fp16)[name = string("k_29_cast_fp16")];
+            tensor<int32, [4]> var_637 = const()[name = string("op_637"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_638_cast_fp16 = reshape(shape = var_637, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_638_cast_fp16")];
+            tensor<int32, [4]> var_639 = const()[name = string("op_639"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
+            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_43_perm_0 = const()[name = string("transpose_43_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, 1500]> transpose_44 = transpose(perm = transpose_44_perm_0, x = k_29_cast_fp16)[name = string("transpose_58")];
+            tensor<fp16, [1, 6, ?, 64]> transpose_43 = transpose(perm = transpose_43_perm_0, x = q_23_cast_fp16)[name = string("transpose_59")];
+            tensor<fp16, [1, 6, ?, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_43, y = transpose_44)[name = string("qk_17_cast_fp16")];
+            tensor<fp16, [1, 6, ?, 1500]> var_643_cast_fp16 = softmax(axis = var_487, x = qk_17_cast_fp16)[name = string("op_643_cast_fp16")];
+            bool var_645_transpose_x_0 = const()[name = string("op_645_transpose_x_0"), val = bool(false)];
+            bool var_645_transpose_y_0 = const()[name = string("op_645_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, 1500, 64]> v_29_cast_fp16 = transpose(perm = var_639, x = var_638_cast_fp16)[name = string("transpose_60")];
+            tensor<fp16, [1, 6, ?, 64]> var_645_cast_fp16 = matmul(transpose_x = var_645_transpose_x_0, transpose_y = var_645_transpose_y_0, x = var_643_cast_fp16, y = v_29_cast_fp16)[name = string("op_645_cast_fp16")];
+            tensor<int32, [4]> var_646 = const()[name = string("op_646"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_67x = const()[name = string("concat_67x"), val = tensor<int32, [3]>([1, -1, 384])];
+            tensor<fp16, [1, ?, 6, 64]> var_647_cast_fp16 = transpose(perm = var_646, x = var_645_cast_fp16)[name = string("transpose_57")];
+            tensor<fp16, [1, ?, 384]> x_49_cast_fp16 = reshape(shape = concat_67x, x = var_647_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<fp16, [384, 384]> var_651_to_fp16 = const()[name = string("op_651_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51497280)))];
+            tensor<fp16, [384]> var_652_to_fp16 = const()[name = string("op_652_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51792256)))];
+            tensor<fp16, [1, ?, 384]> linear_21_cast_fp16 = linear(bias = var_652_to_fp16, weight = var_651_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")];
+            tensor<int32, [1]> var_659_axes_0 = const()[name = string("op_659_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51793088)))];
+            tensor<fp16, [384]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51793920)))];
+            tensor<fp16, [1, ?, 384]> var_659_cast_fp16 = layer_norm(axes = var_659_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_493_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_659_cast_fp16")];
+            tensor<fp16, [1536, 384]> var_668_to_fp16 = const()[name = string("op_668_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51794752)))];
+            tensor<fp16, [1536]> var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52974464)))];
+            tensor<fp16, [1, ?, 1536]> linear_22_cast_fp16 = linear(bias = var_669_to_fp16, weight = var_668_to_fp16, x = var_659_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 1536]> x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")];
+            tensor<fp16, [384, 1536]> var_674_to_fp16 = const()[name = string("op_674_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52977600)))];
+            tensor<fp16, [384]> var_675_to_fp16 = const()[name = string("op_675_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54157312)))];
+            tensor<fp16, [1, ?, 384]> linear_23_cast_fp16 = linear(bias = var_675_to_fp16, weight = var_674_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")];
+            tensor<int32, [4]> k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 384])];
+            tensor<bool, [4]> k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 384]> k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_12)[name = string("k_cache_13_cast_fp16")];
+            tensor<int32, [4]> v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 384])];
+            tensor<bool, [4]> v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 448, 384]> v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_13)[name = string("v_cache_13_cast_fp16")];
+            tensor<int32, [4]> k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 384])];
+            tensor<bool, [4]> k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 384]> k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")];
+            tensor<int32, [4]> v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 384])];
+            tensor<bool, [4]> v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<bool, [4]> v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [1, 1500, 384]> v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")];
+            int32 var_697 = const()[name = string("op_697"), val = int32(-1)];
+            tensor<int32, [1]> var_715_axes_0 = const()[name = string("op_715_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54158144)))];
+            tensor<fp16, [384]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54158976)))];
+            fp16 var_703_to_fp16 = const()[name = string("op_703_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 384]> var_715_cast_fp16 = layer_norm(axes = var_715_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_703_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_715_cast_fp16")];
+            tensor<fp16, [384, 384]> var_726_to_fp16 = const()[name = string("op_726_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54159808)))];
+            tensor<fp16, [384]> var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54454784)))];
+            tensor<fp16, [1, ?, 384]> linear_24_cast_fp16 = linear(bias = var_727_to_fp16, weight = var_726_to_fp16, x = var_715_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [384, 384]> var_730_to_fp16 = const()[name = string("op_730_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54455616)))];
+            tensor<fp16, [1, ?, 384]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_730_to_fp16, x = var_715_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<fp16, [384, 384]> var_734_to_fp16 = const()[name = string("op_734_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54750592)))];
+            tensor<fp16, [384]> var_735_to_fp16 = const()[name = string("op_735_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55045568)))];
+            tensor<fp16, [1, ?, 384]> linear_26_cast_fp16 = linear(bias = var_735_to_fp16, weight = var_734_to_fp16, x = var_715_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [3]> var_737_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_737_shape_cast_fp16")];
+            int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
+            int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
+            bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
+            string var_737_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_737_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
+            uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
+            tensor<uint16, [3]> var_737_shape_cast_fp16_to_uint16 = cast(dtype = var_737_shape_cast_fp16_to_uint16_dtype_0, x = var_737_shape_cast_fp16)[name = string("cast_48")];
+            uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_737_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
+            string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
+            int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_47")];
+            int32 end_step = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step")];
+            tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step)[name = string("expand_dims_51")];
+            tensor<int32, [1]> concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor<int32, [1]>([3])];
+            int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)];
+            bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")];
+            tensor<int32, [1]> concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1]> concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor<int32, [1]>([0])];
+            int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)];
+            bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)];
+            tensor<int32, [4]> concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")];
+            tensor<int32, [4]> k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 448, 384]> k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_12)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_14_write_state")];
+            tensor<int32, [4]> v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
+            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
+            tensor<fp16, [4, 1, 448, 384]> v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_13)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")];
+            write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_15_write_state")];
+            int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)];
+            int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(384)];
+            int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)];
+            bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)];
+            tensor<int32, [3]> concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step, concat_76_values2_0))[name = string("concat_76")];
+            tensor<int32, [3]> var_753_begin_0 = const()[name = string("op_753_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_753_end_mask_0 = const()[name = string("op_753_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 384]> var_753_cast_fp16 = slice_by_index(begin = var_753_begin_0, end = concat_76, end_mask = var_753_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_753_cast_fp16")];
+            tensor<int32, [3]> var_756_begin_0 = const()[name = string("op_756_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<bool, [3]> var_756_end_mask_0 = const()[name = string("op_756_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<fp16, [1, ?, 384]> var_756_cast_fp16 = slice_by_index(begin = var_756_begin_0, end = concat_76, end_mask = var_756_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_756_cast_fp16")];
+            tensor<int32, [4]> concat_78x = const()[name = string("concat_78x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_766_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_766_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> q_27_cast_fp16 = mul(x = var_766_cast_fp16, y = const_32_to_fp16)[name = string("q_27_cast_fp16")];
+            tensor<int32, [4]> concat_79x = const()[name = string("concat_79x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_773_cast_fp16 = reshape(shape = concat_79x, x = var_753_cast_fp16)[name = string("op_773_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> k_35_cast_fp16 = mul(x = var_773_cast_fp16, y = const_33_to_fp16)[name = string("k_35_cast_fp16")];
+            tensor<int32, [4]> concat_80x = const()[name = string("concat_80x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_780_cast_fp16 = reshape(shape = concat_80x, x = var_756_cast_fp16)[name = string("op_780_cast_fp16")];
+            tensor<int32, [4]> var_781 = const()[name = string("op_781"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
+            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, ?]> transpose_46 = transpose(perm = transpose_46_perm_0, x = k_35_cast_fp16)[name = string("transpose_54")];
+            tensor<fp16, [1, 6, ?, 64]> transpose_45 = transpose(perm = transpose_45_perm_0, x = q_27_cast_fp16)[name = string("transpose_55")];
+            tensor<fp16, [1, 6, ?, ?]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_45, y = transpose_46)[name = string("qk_19_cast_fp16")];
+            int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)];
+            int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
+            bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")];
+            tensor<int32, [2]> var_784_begin_0 = const()[name = string("op_784_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_784_end_mask_0 = const()[name = string("op_784_end_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<fp16, [?, 448]> var_784_cast_fp16 = slice_by_index(begin = var_784_begin_0, end = concat_81, end_mask = var_784_end_mask_0, x = mask_to_fp16)[name = string("op_784_cast_fp16")];
+            int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)];
+            int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)];
+            bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)];
+            tensor<int32, [2]> concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")];
+            tensor<int32, [2]> var_785_begin_0 = const()[name = string("op_785_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<bool, [2]> var_785_end_mask_0 = const()[name = string("op_785_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [?, ?]> var_785_cast_fp16 = slice_by_index(begin = var_785_begin_0, end = concat_82, end_mask = var_785_end_mask_0, x = var_784_cast_fp16)[name = string("op_785_cast_fp16")];
+            tensor<fp16, [1, 6, ?, ?]> qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_785_cast_fp16)[name = string("qk_21_cast_fp16")];
+            tensor<fp16, [1, 6, ?, ?]> var_788_cast_fp16 = softmax(axis = var_697, x = qk_21_cast_fp16)[name = string("op_788_cast_fp16")];
+            bool var_790_transpose_x_0 = const()[name = string("op_790_transpose_x_0"), val = bool(false)];
+            bool var_790_transpose_y_0 = const()[name = string("op_790_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, ?, 64]> v_35_cast_fp16 = transpose(perm = var_781, x = var_780_cast_fp16)[name = string("transpose_56")];
+            tensor<fp16, [1, 6, ?, 64]> var_790_cast_fp16 = matmul(transpose_x = var_790_transpose_x_0, transpose_y = var_790_transpose_y_0, x = var_788_cast_fp16, y = v_35_cast_fp16)[name = string("op_790_cast_fp16")];
+            tensor<int32, [4]> var_791 = const()[name = string("op_791"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_83x = const()[name = string("concat_83x"), val = tensor<int32, [3]>([1, -1, 384])];
+            tensor<fp16, [1, ?, 6, 64]> var_792_cast_fp16 = transpose(perm = var_791, x = var_790_cast_fp16)[name = string("transpose_53")];
+            tensor<fp16, [1, ?, 384]> x_61_cast_fp16 = reshape(shape = concat_83x, x = var_792_cast_fp16)[name = string("x_61_cast_fp16")];
+            tensor<fp16, [384, 384]> var_796_to_fp16 = const()[name = string("op_796_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55046400)))];
+            tensor<fp16, [384]> var_797_to_fp16 = const()[name = string("op_797_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55341376)))];
+            tensor<fp16, [1, ?, 384]> linear_27_cast_fp16 = linear(bias = var_797_to_fp16, weight = var_796_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")];
+            tensor<int32, [1]> var_804_axes_0 = const()[name = string("op_804_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55342208)))];
+            tensor<fp16, [384]> blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55343040)))];
+            tensor<fp16, [1, ?, 384]> var_804_cast_fp16 = layer_norm(axes = var_804_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_703_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_804_cast_fp16")];
+            tensor<fp16, [384, 384]> var_813_to_fp16 = const()[name = string("op_813_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55343872)))];
+            tensor<fp16, [384]> var_814_to_fp16 = const()[name = string("op_814_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55638848)))];
+            tensor<fp16, [1, ?, 384]> linear_28_cast_fp16 = linear(bias = var_814_to_fp16, weight = var_813_to_fp16, x = var_804_cast_fp16)[name = string("linear_28_cast_fp16")];
+            tensor<int32, [3]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_85 = const()[name = string("concat_85"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 384]> k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [3]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> concat_87 = const()[name = string("concat_87"), val = tensor<int32, [3]>([0, 1500, 0])];
+            tensor<int32, [3]> v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
+            tensor<bool, [3]> v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
+            tensor<fp16, [1, 1500, 384]> v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")];
+            tensor<int32, [4]> concat_88x = const()[name = string("concat_88x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
+            tensor<fp16, [1, ?, 6, 64]> var_834_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_834_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, ?, 6, 64]> q_cast_fp16 = mul(x = var_834_cast_fp16, y = const_34_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_840 = const()[name = string("op_840"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_841_cast_fp16 = reshape(shape = var_840, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_841_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> k_cast_fp16 = mul(x = var_841_cast_fp16, y = const_35_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_847 = const()[name = string("op_847"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_848_cast_fp16 = reshape(shape = var_847, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_848_cast_fp16")];
+            tensor<int32, [4]> var_849 = const()[name = string("op_849"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_47_perm_0 = const()[name = string("transpose_47_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, 1500]> transpose_48 = transpose(perm = transpose_48_perm_0, x = k_cast_fp16)[name = string("transpose_50")];
+            tensor<fp16, [1, 6, ?, 64]> transpose_47 = transpose(perm = transpose_47_perm_0, x = q_cast_fp16)[name = string("transpose_51")];
+            tensor<fp16, [1, 6, ?, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_47, y = transpose_48)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 6, ?, 1500]> var_853_cast_fp16 = softmax(axis = var_697, x = qk_cast_fp16)[name = string("op_853_cast_fp16")];
+            bool var_855_transpose_x_0 = const()[name = string("op_855_transpose_x_0"), val = bool(false)];
+            bool var_855_transpose_y_0 = const()[name = string("op_855_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, 1500, 64]> v_cast_fp16 = transpose(perm = var_849, x = var_848_cast_fp16)[name = string("transpose_52")];
+            tensor<fp16, [1, 6, ?, 64]> var_855_cast_fp16 = matmul(transpose_x = var_855_transpose_x_0, transpose_y = var_855_transpose_y_0, x = var_853_cast_fp16, y = v_cast_fp16)[name = string("op_855_cast_fp16")];
+            tensor<int32, [4]> var_856 = const()[name = string("op_856"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_89x = const()[name = string("concat_89x"), val = tensor<int32, [3]>([1, -1, 384])];
+            tensor<fp16, [1, ?, 6, 64]> var_857_cast_fp16 = transpose(perm = var_856, x = var_855_cast_fp16)[name = string("transpose_49")];
+            tensor<fp16, [1, ?, 384]> x_67_cast_fp16 = reshape(shape = concat_89x, x = var_857_cast_fp16)[name = string("x_67_cast_fp16")];
+            tensor<fp16, [384, 384]> var_861_to_fp16 = const()[name = string("op_861_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55639680)))];
+            tensor<fp16, [384]> var_862_to_fp16 = const()[name = string("op_862_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55934656)))];
+            tensor<fp16, [1, ?, 384]> linear_29_cast_fp16 = linear(bias = var_862_to_fp16, weight = var_861_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")];
+            tensor<int32, [1]> var_869_axes_0 = const()[name = string("op_869_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55935488)))];
+            tensor<fp16, [384]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55936320)))];
+            tensor<fp16, [1, ?, 384]> var_869_cast_fp16 = layer_norm(axes = var_869_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_703_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_869_cast_fp16")];
+            tensor<fp16, [1536, 384]> var_878_to_fp16 = const()[name = string("op_878_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55937152)))];
+            tensor<fp16, [1536]> var_879_to_fp16 = const()[name = string("op_879_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57116864)))];
+            tensor<fp16, [1, ?, 1536]> linear_30_cast_fp16 = linear(bias = var_879_to_fp16, weight = var_878_to_fp16, x = var_869_cast_fp16)[name = string("linear_30_cast_fp16")];
+            string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, ?, 1536]> x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")];
+            tensor<fp16, [384, 1536]> var_884_to_fp16 = const()[name = string("op_884_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57120000)))];
+            tensor<fp16, [384]> var_885_to_fp16 = const()[name = string("op_885_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58299712)))];
+            tensor<fp16, [1, ?, 384]> linear_31_cast_fp16 = linear(bias = var_885_to_fp16, weight = var_884_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<fp16, [1, ?, 384]> x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")];
+            tensor<int32, [1]> var_898_axes_0 = const()[name = string("op_898_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58300544)))];
+            tensor<fp16, [384]> ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58301376)))];
+            fp16 var_889_to_fp16 = const()[name = string("op_889_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, ?, 384]> var_898_cast_fp16 = layer_norm(axes = var_898_axes_0, beta = ln_bias_to_fp16, epsilon = var_889_to_fp16, gamma = ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_898_cast_fp16")];
+            tensor<fp16, [51865]> var_908_bias_0_to_fp16 = const()[name = string("op_908_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58302208)))];
+            tensor<fp16, [1, ?, 51865]> logits = linear(bias = var_908_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_898_cast_fp16)[name = string("op_908_cast_fp16")];
+        } -> (logits);
+}
\ No newline at end of file
diff --git a/tiny/decoder_second.mlmodelc/weights/weight.bin b/tiny/decoder_second.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7e6d7af3645a778a74ca9a4c32ca95f8726e841b
--- /dev/null
+++ b/tiny/decoder_second.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c4929371005f27adeec8f37f375227b8128a408fdd9200141e55bcb60b15ecd
+size 58406002
diff --git a/tiny/encoder.mlmodelc/analytics/coremldata.bin b/tiny/encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c039acf268199f7868ea911d5bfaf2980a0a085f
--- /dev/null
+++ b/tiny/encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:380e64d472e815b7206b93221ca71457f93ca000336ae30076e221b957869230
+size 243
diff --git a/tiny/encoder.mlmodelc/coremldata.bin b/tiny/encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c562185638877041b6e19bba7a231c397aab67df
--- /dev/null
+++ b/tiny/encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e0e4ad833efbbc0cff4791bce7b7852af5d36a9f32212d93cf63a454e89733b
+size 318
diff --git a/tiny/encoder.mlmodelc/metadata.json b/tiny/encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..96e3fa505bf060e5217b13131e775ff07cdf3cc2
--- /dev/null
+++ b/tiny/encoder.mlmodelc/metadata.json
@@ -0,0 +1,69 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500 × 384)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 384]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.mul" : 8,
+      "Ios18.softmax" : 4,
+      "Ios18.linear" : 24,
+      "Ios18.gelu" : 6,
+      "Ios18.layerNorm" : 9,
+      "Ios18.transpose" : 17,
+      "Ios18.matmul" : 8,
+      "Ios18.conv" : 2,
+      "Ios18.add" : 9,
+      "Ios18.reshape" : 16
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "8.0",
+      "com.github.apple.coremltools.source" : "torch==2.4.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "encoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/tiny/encoder.mlmodelc/model.mil b/tiny/encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..a714d7f89a7b947d00e60c58452d3233aa2c3fd2
--- /dev/null
+++ b/tiny/encoder.mlmodelc/model.mil
@@ -0,0 +1,268 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [1, 80, 3000]> logmel_data) {
+            string var_28_pad_type_0 = const()[name = string("op_28_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_28_pad_0 = const()[name = string("op_28_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_28_strides_0 = const()[name = string("op_28_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_28_dilations_0 = const()[name = string("op_28_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_28_groups_0 = const()[name = string("op_28_groups_0"), val = int32(1)];
+            tensor<fp16, [384, 80, 3]> weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor<fp16, [384, 80, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [384]> bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184448)))];
+            tensor<fp16, [1, 384, 3000]> var_28_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_28_dilations_0, groups = var_28_groups_0, pad = var_28_pad_0, pad_type = var_28_pad_type_0, strides = var_28_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_28_cast_fp16")];
+            string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 384, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_28_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_46_pad_type_0 = const()[name = string("op_46_pad_type_0"), val = string("custom")];
+            tensor<int32, [2]> var_46_pad_0 = const()[name = string("op_46_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_46_strides_0 = const()[name = string("op_46_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_46_dilations_0 = const()[name = string("op_46_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 var_46_groups_0 = const()[name = string("op_46_groups_0"), val = int32(1)];
+            tensor<fp16, [384, 384, 3]> weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor<fp16, [384, 384, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185280)))];
+            tensor<fp16, [384]> bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1070080)))];
+            tensor<fp16, [1, 384, 1500]> var_46_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_46_dilations_0, groups = var_46_groups_0, pad = var_46_pad_0, pad_type = var_46_pad_type_0, strides = var_46_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_46_cast_fp16")];
+            string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 384, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_46_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<int32, [3]> var_52 = const()[name = string("op_52"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [1500, 384]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [1500, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1070912)))];
+            tensor<fp16, [1, 1500, 384]> x_5_cast_fp16 = transpose(perm = var_52, x = x_3_cast_fp16)[name = string("transpose_40")];
+            tensor<fp16, [1, 1500, 384]> var_55_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_55_cast_fp16")];
+            int32 var_67 = const()[name = string("op_67"), val = int32(-1)];
+            tensor<int32, [1]> var_83_axes_0 = const()[name = string("op_83_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2222976)))];
+            tensor<fp16, [384]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2223808)))];
+            fp16 var_73_to_fp16 = const()[name = string("op_73_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 384]> var_83_cast_fp16 = layer_norm(axes = var_83_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_55_cast_fp16)[name = string("op_83_cast_fp16")];
+            tensor<fp16, [384, 384]> var_94_to_fp16 = const()[name = string("op_94_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2224640)))];
+            tensor<fp16, [384]> var_95_to_fp16 = const()[name = string("op_95_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2519616)))];
+            tensor<fp16, [1, 1500, 384]> linear_0_cast_fp16 = linear(bias = var_95_to_fp16, weight = var_94_to_fp16, x = var_83_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [384, 384]> var_98_to_fp16 = const()[name = string("op_98_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2520448)))];
+            tensor<fp16, [384]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2815424)))];
+            tensor<fp16, [1, 1500, 384]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_98_to_fp16, x = var_83_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<fp16, [384, 384]> var_102_to_fp16 = const()[name = string("op_102_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2816256)))];
+            tensor<fp16, [384]> var_103_to_fp16 = const()[name = string("op_103_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3111232)))];
+            tensor<fp16, [1, 1500, 384]> linear_2_cast_fp16 = linear(bias = var_103_to_fp16, weight = var_102_to_fp16, x = var_83_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [4]> var_111 = const()[name = string("op_111"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_112_cast_fp16 = reshape(shape = var_111, x = linear_0_cast_fp16)[name = string("op_112_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_28_to_fp16 = const()[name = string("const_28_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> q_3_cast_fp16 = mul(x = var_112_cast_fp16, y = const_28_to_fp16)[name = string("q_3_cast_fp16")];
+            tensor<int32, [4]> var_118 = const()[name = string("op_118"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_119_cast_fp16 = reshape(shape = var_118, x = linear_1_cast_fp16)[name = string("op_119_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> k_3_cast_fp16 = mul(x = var_119_cast_fp16, y = const_29_to_fp16)[name = string("k_3_cast_fp16")];
+            tensor<int32, [4]> var_125 = const()[name = string("op_125"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_126_cast_fp16 = reshape(shape = var_125, x = linear_2_cast_fp16)[name = string("op_126_cast_fp16")];
+            tensor<int32, [4]> var_127 = const()[name = string("op_127"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
+            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, 1500]> transpose_17 = transpose(perm = transpose_17_perm_0, x = k_3_cast_fp16)[name = string("transpose_37")];
+            tensor<fp16, [1, 6, 1500, 64]> transpose_16 = transpose(perm = transpose_16_perm_0, x = q_3_cast_fp16)[name = string("transpose_38")];
+            tensor<fp16, [1, 6, 1500, 1500]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_16, y = transpose_17)[name = string("qk_1_cast_fp16")];
+            tensor<fp16, [1, 6, 1500, 1500]> var_131_cast_fp16 = softmax(axis = var_67, x = qk_1_cast_fp16)[name = string("op_131_cast_fp16")];
+            bool var_133_transpose_x_0 = const()[name = string("op_133_transpose_x_0"), val = bool(false)];
+            bool var_133_transpose_y_0 = const()[name = string("op_133_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, 1500, 64]> v_3_cast_fp16 = transpose(perm = var_127, x = var_126_cast_fp16)[name = string("transpose_39")];
+            tensor<fp16, [1, 6, 1500, 64]> var_133_cast_fp16 = matmul(transpose_x = var_133_transpose_x_0, transpose_y = var_133_transpose_y_0, x = var_131_cast_fp16, y = v_3_cast_fp16)[name = string("op_133_cast_fp16")];
+            tensor<int32, [4]> var_134 = const()[name = string("op_134"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_0 = const()[name = string("concat_0"), val = tensor<int32, [3]>([1, 1500, 384])];
+            tensor<fp16, [1, 1500, 6, 64]> var_135_cast_fp16 = transpose(perm = var_134, x = var_133_cast_fp16)[name = string("transpose_36")];
+            tensor<fp16, [1, 1500, 384]> x_11_cast_fp16 = reshape(shape = concat_0, x = var_135_cast_fp16)[name = string("x_11_cast_fp16")];
+            tensor<fp16, [384, 384]> var_139_to_fp16 = const()[name = string("op_139_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3112064)))];
+            tensor<fp16, [384]> var_140_to_fp16 = const()[name = string("op_140_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3407040)))];
+            tensor<fp16, [1, 1500, 384]> linear_3_cast_fp16 = linear(bias = var_140_to_fp16, weight = var_139_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<fp16, [1, 1500, 384]> x_13_cast_fp16 = add(x = var_55_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")];
+            tensor<int32, [1]> var_147_axes_0 = const()[name = string("op_147_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3407872)))];
+            tensor<fp16, [384]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3408704)))];
+            tensor<fp16, [1, 1500, 384]> var_147_cast_fp16 = layer_norm(axes = var_147_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_147_cast_fp16")];
+            tensor<fp16, [1536, 384]> var_156_to_fp16 = const()[name = string("op_156_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3409536)))];
+            tensor<fp16, [1536]> var_157_to_fp16 = const()[name = string("op_157_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4589248)))];
+            tensor<fp16, [1, 1500, 1536]> linear_4_cast_fp16 = linear(bias = var_157_to_fp16, weight = var_156_to_fp16, x = var_147_cast_fp16)[name = string("linear_4_cast_fp16")];
+            string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 1536]> x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")];
+            tensor<fp16, [384, 1536]> var_162_to_fp16 = const()[name = string("op_162_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4592384)))];
+            tensor<fp16, [384]> var_163_to_fp16 = const()[name = string("op_163_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5772096)))];
+            tensor<fp16, [1, 1500, 384]> linear_5_cast_fp16 = linear(bias = var_163_to_fp16, weight = var_162_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")];
+            tensor<fp16, [1, 1500, 384]> x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")];
+            int32 var_172 = const()[name = string("op_172"), val = int32(-1)];
+            tensor<int32, [1]> var_188_axes_0 = const()[name = string("op_188_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5772928)))];
+            tensor<fp16, [384]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5773760)))];
+            fp16 var_178_to_fp16 = const()[name = string("op_178_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 384]> var_188_cast_fp16 = layer_norm(axes = var_188_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_178_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_188_cast_fp16")];
+            tensor<fp16, [384, 384]> var_199_to_fp16 = const()[name = string("op_199_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5774592)))];
+            tensor<fp16, [384]> var_200_to_fp16 = const()[name = string("op_200_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6069568)))];
+            tensor<fp16, [1, 1500, 384]> linear_6_cast_fp16 = linear(bias = var_200_to_fp16, weight = var_199_to_fp16, x = var_188_cast_fp16)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [384, 384]> var_203_to_fp16 = const()[name = string("op_203_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6070400)))];
+            tensor<fp16, [1, 1500, 384]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_203_to_fp16, x = var_188_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<fp16, [384, 384]> var_207_to_fp16 = const()[name = string("op_207_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6365376)))];
+            tensor<fp16, [384]> var_208_to_fp16 = const()[name = string("op_208_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6660352)))];
+            tensor<fp16, [1, 1500, 384]> linear_8_cast_fp16 = linear(bias = var_208_to_fp16, weight = var_207_to_fp16, x = var_188_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<int32, [4]> var_216 = const()[name = string("op_216"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_217_cast_fp16 = reshape(shape = var_216, x = linear_6_cast_fp16)[name = string("op_217_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> q_7_cast_fp16 = mul(x = var_217_cast_fp16, y = const_30_to_fp16)[name = string("q_7_cast_fp16")];
+            tensor<int32, [4]> var_223 = const()[name = string("op_223"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_224_cast_fp16 = reshape(shape = var_223, x = linear_7_cast_fp16)[name = string("op_224_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> k_7_cast_fp16 = mul(x = var_224_cast_fp16, y = const_31_to_fp16)[name = string("k_7_cast_fp16")];
+            tensor<int32, [4]> var_230 = const()[name = string("op_230"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_231_cast_fp16 = reshape(shape = var_230, x = linear_8_cast_fp16)[name = string("op_231_cast_fp16")];
+            tensor<int32, [4]> var_232 = const()[name = string("op_232"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)];
+            bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, 1500]> transpose_19 = transpose(perm = transpose_19_perm_0, x = k_7_cast_fp16)[name = string("transpose_33")];
+            tensor<fp16, [1, 6, 1500, 64]> transpose_18 = transpose(perm = transpose_18_perm_0, x = q_7_cast_fp16)[name = string("transpose_34")];
+            tensor<fp16, [1, 6, 1500, 1500]> qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_18, y = transpose_19)[name = string("qk_3_cast_fp16")];
+            tensor<fp16, [1, 6, 1500, 1500]> var_236_cast_fp16 = softmax(axis = var_172, x = qk_3_cast_fp16)[name = string("op_236_cast_fp16")];
+            bool var_238_transpose_x_0 = const()[name = string("op_238_transpose_x_0"), val = bool(false)];
+            bool var_238_transpose_y_0 = const()[name = string("op_238_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, 1500, 64]> v_7_cast_fp16 = transpose(perm = var_232, x = var_231_cast_fp16)[name = string("transpose_35")];
+            tensor<fp16, [1, 6, 1500, 64]> var_238_cast_fp16 = matmul(transpose_x = var_238_transpose_x_0, transpose_y = var_238_transpose_y_0, x = var_236_cast_fp16, y = v_7_cast_fp16)[name = string("op_238_cast_fp16")];
+            tensor<int32, [4]> var_239 = const()[name = string("op_239"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_1 = const()[name = string("concat_1"), val = tensor<int32, [3]>([1, 1500, 384])];
+            tensor<fp16, [1, 1500, 6, 64]> var_240_cast_fp16 = transpose(perm = var_239, x = var_238_cast_fp16)[name = string("transpose_32")];
+            tensor<fp16, [1, 1500, 384]> x_23_cast_fp16 = reshape(shape = concat_1, x = var_240_cast_fp16)[name = string("x_23_cast_fp16")];
+            tensor<fp16, [384, 384]> var_244_to_fp16 = const()[name = string("op_244_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6661184)))];
+            tensor<fp16, [384]> var_245_to_fp16 = const()[name = string("op_245_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6956160)))];
+            tensor<fp16, [1, 1500, 384]> linear_9_cast_fp16 = linear(bias = var_245_to_fp16, weight = var_244_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<fp16, [1, 1500, 384]> x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")];
+            tensor<int32, [1]> var_252_axes_0 = const()[name = string("op_252_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6956992)))];
+            tensor<fp16, [384]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6957824)))];
+            tensor<fp16, [1, 1500, 384]> var_252_cast_fp16 = layer_norm(axes = var_252_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_178_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_252_cast_fp16")];
+            tensor<fp16, [1536, 384]> var_261_to_fp16 = const()[name = string("op_261_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6958656)))];
+            tensor<fp16, [1536]> var_262_to_fp16 = const()[name = string("op_262_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8138368)))];
+            tensor<fp16, [1, 1500, 1536]> linear_10_cast_fp16 = linear(bias = var_262_to_fp16, weight = var_261_to_fp16, x = var_252_cast_fp16)[name = string("linear_10_cast_fp16")];
+            string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 1536]> x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")];
+            tensor<fp16, [384, 1536]> var_267_to_fp16 = const()[name = string("op_267_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8141504)))];
+            tensor<fp16, [384]> var_268_to_fp16 = const()[name = string("op_268_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9321216)))];
+            tensor<fp16, [1, 1500, 384]> linear_11_cast_fp16 = linear(bias = var_268_to_fp16, weight = var_267_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")];
+            tensor<fp16, [1, 1500, 384]> x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")];
+            int32 var_277 = const()[name = string("op_277"), val = int32(-1)];
+            tensor<int32, [1]> var_293_axes_0 = const()[name = string("op_293_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9322048)))];
+            tensor<fp16, [384]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9322880)))];
+            fp16 var_283_to_fp16 = const()[name = string("op_283_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 384]> var_293_cast_fp16 = layer_norm(axes = var_293_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_293_cast_fp16")];
+            tensor<fp16, [384, 384]> var_304_to_fp16 = const()[name = string("op_304_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9323712)))];
+            tensor<fp16, [384]> var_305_to_fp16 = const()[name = string("op_305_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9618688)))];
+            tensor<fp16, [1, 1500, 384]> linear_12_cast_fp16 = linear(bias = var_305_to_fp16, weight = var_304_to_fp16, x = var_293_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [384, 384]> var_308_to_fp16 = const()[name = string("op_308_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9619520)))];
+            tensor<fp16, [1, 1500, 384]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_308_to_fp16, x = var_293_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<fp16, [384, 384]> var_312_to_fp16 = const()[name = string("op_312_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9914496)))];
+            tensor<fp16, [384]> var_313_to_fp16 = const()[name = string("op_313_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10209472)))];
+            tensor<fp16, [1, 1500, 384]> linear_14_cast_fp16 = linear(bias = var_313_to_fp16, weight = var_312_to_fp16, x = var_293_cast_fp16)[name = string("linear_14_cast_fp16")];
+            tensor<int32, [4]> var_321 = const()[name = string("op_321"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_322_cast_fp16 = reshape(shape = var_321, x = linear_12_cast_fp16)[name = string("op_322_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> q_11_cast_fp16 = mul(x = var_322_cast_fp16, y = const_32_to_fp16)[name = string("q_11_cast_fp16")];
+            tensor<int32, [4]> var_328 = const()[name = string("op_328"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_329_cast_fp16 = reshape(shape = var_328, x = linear_13_cast_fp16)[name = string("op_329_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> k_11_cast_fp16 = mul(x = var_329_cast_fp16, y = const_33_to_fp16)[name = string("k_11_cast_fp16")];
+            tensor<int32, [4]> var_335 = const()[name = string("op_335"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_336_cast_fp16 = reshape(shape = var_335, x = linear_14_cast_fp16)[name = string("op_336_cast_fp16")];
+            tensor<int32, [4]> var_337 = const()[name = string("op_337"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
+            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, 1500]> transpose_21 = transpose(perm = transpose_21_perm_0, x = k_11_cast_fp16)[name = string("transpose_29")];
+            tensor<fp16, [1, 6, 1500, 64]> transpose_20 = transpose(perm = transpose_20_perm_0, x = q_11_cast_fp16)[name = string("transpose_30")];
+            tensor<fp16, [1, 6, 1500, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_20, y = transpose_21)[name = string("qk_5_cast_fp16")];
+            tensor<fp16, [1, 6, 1500, 1500]> var_341_cast_fp16 = softmax(axis = var_277, x = qk_5_cast_fp16)[name = string("op_341_cast_fp16")];
+            bool var_343_transpose_x_0 = const()[name = string("op_343_transpose_x_0"), val = bool(false)];
+            bool var_343_transpose_y_0 = const()[name = string("op_343_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, 1500, 64]> v_11_cast_fp16 = transpose(perm = var_337, x = var_336_cast_fp16)[name = string("transpose_31")];
+            tensor<fp16, [1, 6, 1500, 64]> var_343_cast_fp16 = matmul(transpose_x = var_343_transpose_x_0, transpose_y = var_343_transpose_y_0, x = var_341_cast_fp16, y = v_11_cast_fp16)[name = string("op_343_cast_fp16")];
+            tensor<int32, [4]> var_344 = const()[name = string("op_344"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_2 = const()[name = string("concat_2"), val = tensor<int32, [3]>([1, 1500, 384])];
+            tensor<fp16, [1, 1500, 6, 64]> var_345_cast_fp16 = transpose(perm = var_344, x = var_343_cast_fp16)[name = string("transpose_28")];
+            tensor<fp16, [1, 1500, 384]> x_35_cast_fp16 = reshape(shape = concat_2, x = var_345_cast_fp16)[name = string("x_35_cast_fp16")];
+            tensor<fp16, [384, 384]> var_349_to_fp16 = const()[name = string("op_349_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10210304)))];
+            tensor<fp16, [384]> var_350_to_fp16 = const()[name = string("op_350_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10505280)))];
+            tensor<fp16, [1, 1500, 384]> linear_15_cast_fp16 = linear(bias = var_350_to_fp16, weight = var_349_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<fp16, [1, 1500, 384]> x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")];
+            tensor<int32, [1]> var_357_axes_0 = const()[name = string("op_357_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10506112)))];
+            tensor<fp16, [384]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10506944)))];
+            tensor<fp16, [1, 1500, 384]> var_357_cast_fp16 = layer_norm(axes = var_357_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_357_cast_fp16")];
+            tensor<fp16, [1536, 384]> var_366_to_fp16 = const()[name = string("op_366_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10507776)))];
+            tensor<fp16, [1536]> var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11687488)))];
+            tensor<fp16, [1, 1500, 1536]> linear_16_cast_fp16 = linear(bias = var_367_to_fp16, weight = var_366_to_fp16, x = var_357_cast_fp16)[name = string("linear_16_cast_fp16")];
+            string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 1536]> x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")];
+            tensor<fp16, [384, 1536]> var_372_to_fp16 = const()[name = string("op_372_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11690624)))];
+            tensor<fp16, [384]> var_373_to_fp16 = const()[name = string("op_373_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12870336)))];
+            tensor<fp16, [1, 1500, 384]> linear_17_cast_fp16 = linear(bias = var_373_to_fp16, weight = var_372_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")];
+            tensor<fp16, [1, 1500, 384]> x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")];
+            int32 var_382 = const()[name = string("op_382"), val = int32(-1)];
+            tensor<int32, [1]> var_398_axes_0 = const()[name = string("op_398_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12871168)))];
+            tensor<fp16, [384]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12872000)))];
+            fp16 var_388_to_fp16 = const()[name = string("op_388_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 384]> var_398_cast_fp16 = layer_norm(axes = var_398_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_388_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_398_cast_fp16")];
+            tensor<fp16, [384, 384]> var_409_to_fp16 = const()[name = string("op_409_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12872832)))];
+            tensor<fp16, [384]> var_410_to_fp16 = const()[name = string("op_410_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13167808)))];
+            tensor<fp16, [1, 1500, 384]> linear_18_cast_fp16 = linear(bias = var_410_to_fp16, weight = var_409_to_fp16, x = var_398_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [384, 384]> var_413_to_fp16 = const()[name = string("op_413_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13168640)))];
+            tensor<fp16, [1, 1500, 384]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_413_to_fp16, x = var_398_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<fp16, [384, 384]> var_417_to_fp16 = const()[name = string("op_417_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13463616)))];
+            tensor<fp16, [384]> var_418_to_fp16 = const()[name = string("op_418_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13758592)))];
+            tensor<fp16, [1, 1500, 384]> linear_20_cast_fp16 = linear(bias = var_418_to_fp16, weight = var_417_to_fp16, x = var_398_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [4]> var_426 = const()[name = string("op_426"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_427_cast_fp16 = reshape(shape = var_426, x = linear_18_cast_fp16)[name = string("op_427_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> q_cast_fp16 = mul(x = var_427_cast_fp16, y = const_34_to_fp16)[name = string("q_cast_fp16")];
+            tensor<int32, [4]> var_433 = const()[name = string("op_433"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_434_cast_fp16 = reshape(shape = var_433, x = linear_19_cast_fp16)[name = string("op_434_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 1]> const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
+            tensor<fp16, [1, 1500, 6, 64]> k_cast_fp16 = mul(x = var_434_cast_fp16, y = const_35_to_fp16)[name = string("k_cast_fp16")];
+            tensor<int32, [4]> var_440 = const()[name = string("op_440"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
+            tensor<fp16, [1, 1500, 6, 64]> var_441_cast_fp16 = reshape(shape = var_440, x = linear_20_cast_fp16)[name = string("op_441_cast_fp16")];
+            tensor<int32, [4]> var_442 = const()[name = string("op_442"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
+            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
+            tensor<int32, [4]> transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 6, 64, 1500]> transpose_23 = transpose(perm = transpose_23_perm_0, x = k_cast_fp16)[name = string("transpose_25")];
+            tensor<fp16, [1, 6, 1500, 64]> transpose_22 = transpose(perm = transpose_22_perm_0, x = q_cast_fp16)[name = string("transpose_26")];
+            tensor<fp16, [1, 6, 1500, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_22, y = transpose_23)[name = string("qk_cast_fp16")];
+            tensor<fp16, [1, 6, 1500, 1500]> var_446_cast_fp16 = softmax(axis = var_382, x = qk_cast_fp16)[name = string("op_446_cast_fp16")];
+            bool var_448_transpose_x_0 = const()[name = string("op_448_transpose_x_0"), val = bool(false)];
+            bool var_448_transpose_y_0 = const()[name = string("op_448_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 6, 1500, 64]> v_cast_fp16 = transpose(perm = var_442, x = var_441_cast_fp16)[name = string("transpose_27")];
+            tensor<fp16, [1, 6, 1500, 64]> var_448_cast_fp16 = matmul(transpose_x = var_448_transpose_x_0, transpose_y = var_448_transpose_y_0, x = var_446_cast_fp16, y = v_cast_fp16)[name = string("op_448_cast_fp16")];
+            tensor<int32, [4]> var_449 = const()[name = string("op_449"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> concat_3 = const()[name = string("concat_3"), val = tensor<int32, [3]>([1, 1500, 384])];
+            tensor<fp16, [1, 1500, 6, 64]> var_450_cast_fp16 = transpose(perm = var_449, x = var_448_cast_fp16)[name = string("transpose_24")];
+            tensor<fp16, [1, 1500, 384]> x_47_cast_fp16 = reshape(shape = concat_3, x = var_450_cast_fp16)[name = string("x_47_cast_fp16")];
+            tensor<fp16, [384, 384]> var_454_to_fp16 = const()[name = string("op_454_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13759424)))];
+            tensor<fp16, [384]> var_455_to_fp16 = const()[name = string("op_455_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14054400)))];
+            tensor<fp16, [1, 1500, 384]> linear_21_cast_fp16 = linear(bias = var_455_to_fp16, weight = var_454_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<fp16, [1, 1500, 384]> x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")];
+            tensor<int32, [1]> var_462_axes_0 = const()[name = string("op_462_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14055232)))];
+            tensor<fp16, [384]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14056064)))];
+            tensor<fp16, [1, 1500, 384]> var_462_cast_fp16 = layer_norm(axes = var_462_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_388_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_462_cast_fp16")];
+            tensor<fp16, [1536, 384]> var_471_to_fp16 = const()[name = string("op_471_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14056896)))];
+            tensor<fp16, [1536]> var_472_to_fp16 = const()[name = string("op_472_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15236608)))];
+            tensor<fp16, [1, 1500, 1536]> linear_22_cast_fp16 = linear(bias = var_472_to_fp16, weight = var_471_to_fp16, x = var_462_cast_fp16)[name = string("linear_22_cast_fp16")];
+            string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1500, 1536]> x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")];
+            tensor<fp16, [384, 1536]> var_477_to_fp16 = const()[name = string("op_477_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15239744)))];
+            tensor<fp16, [384]> var_478_to_fp16 = const()[name = string("op_478_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16419456)))];
+            tensor<fp16, [1, 1500, 384]> linear_23_cast_fp16 = linear(bias = var_478_to_fp16, weight = var_477_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")];
+            tensor<fp16, [1, 1500, 384]> x_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_cast_fp16")];
+            tensor<int32, [1]> var_491_axes_0 = const()[name = string("op_491_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [384]> ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16420288)))];
+            tensor<fp16, [384]> ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16421120)))];
+            fp16 var_482_to_fp16 = const()[name = string("op_482_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1500, 384]> output = layer_norm(axes = var_491_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_482_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_491_cast_fp16")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/tiny/encoder.mlmodelc/weights/weight.bin b/tiny/encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2e98f9000570b03c2ece1fcc966a3cd12fcfab05
--- /dev/null
+++ b/tiny/encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4efa9bb81afaf12ac6d7cf7a3a4ba1e6b92f05f96ae77fd55cf725e2ecd3a5fd
+size 16421952
diff --git a/tiny/model_dims.json b/tiny/model_dims.json
new file mode 100644
index 0000000000000000000000000000000000000000..811192d21c045ca7d97eb76fe25e783bea0802f0
--- /dev/null
+++ b/tiny/model_dims.json
@@ -0,0 +1,12 @@
+{
+  "n_mels": 80,
+  "n_audio_ctx": 1500,
+  "n_audio_state": 384,
+  "n_audio_head": 6,
+  "n_audio_layer": 4,
+  "n_vocab": 51865,
+  "n_text_ctx": 448,
+  "n_text_state": 384,
+  "n_text_head": 6,
+  "n_text_layer": 4
+}
\ No newline at end of file
diff --git a/whisper_convert.py b/whisper_convert.py
new file mode 100755
index 0000000000000000000000000000000000000000..0a3bc6498bd33c9a00cca4167f10823b1043f3bc
--- /dev/null
+++ b/whisper_convert.py
@@ -0,0 +1,434 @@
+#!/usr/bin/env python3
+
+from whisper import whisper
+
+import torch
+from torch import Tensor, nn
+import torch.nn.functional as F
+from typing import Optional, Iterable
+from dataclasses import dataclass
+import json
+
+@dataclass
+class ModelDimensions:
+    n_mels: int
+    n_audio_ctx: int
+    n_audio_state: int
+    n_audio_head: int
+    n_audio_layer: int
+    n_vocab: int
+    n_text_ctx: int
+    n_text_state: int
+    n_text_head: int
+    n_text_layer: int
+
+class LayerNorm(nn.LayerNorm):
+    def forward(self, x: Tensor) -> Tensor:
+        return super().forward(x.float()).type(x.dtype)
+
+
+class Linear(nn.Linear):
+    def forward(self, x: Tensor) -> Tensor:
+        return F.linear(
+            x,
+            self.weight.to(x.dtype),
+            None if self.bias is None else self.bias.to(x.dtype),
+        )
+
+class Conv1d(nn.Conv1d):
+    def _conv_forward(
+        self, x: Tensor, weight: Tensor, bias: Optional[Tensor]
+    ) -> Tensor:
+        return super()._conv_forward(
+            x, weight.to(x.dtype), None if bias is None else bias.to(x.dtype)
+        )
+
+class MultiHeadAttention(nn.Module):
+    def __init__(self, n_state: int, n_head: int, no_cross: bool = False, cross_only: bool = False):
+        super().__init__()
+        self.no_cross = no_cross
+        self.cross_only = cross_only
+        self.n_head = n_head
+        if not cross_only:
+            self.query = Linear(n_state, n_state)
+            self.out = Linear(n_state, n_state)
+        if not no_cross:
+            self.key = Linear(n_state, n_state, bias=False)
+            self.value = Linear(n_state, n_state)
+
+    def forward(
+        self,
+        x: Tensor,
+        mask: Optional[Tensor] = None,
+        k_cache: Optional[Tensor] = None,
+        v_cache: Optional[Tensor] = None,
+        offset: Optional[int] = None,
+    ):
+        if self.cross_only:
+            k = self.key(x)
+            v = self.value(x)
+            k_len = k.shape[-2]
+            k_cache[:,:k_len,:] = k
+            v_len = v.shape[-2]
+            v_cache[:,:v_len,:] = v
+            return x
+
+        q = self.query(x)
+
+        if self.no_cross:
+            k = torch.zeros_like(k_cache)
+            k_len = k.shape[-2]
+            k[:,:k_len,:] = k_cache
+            v = torch.zeros_like(v_cache)
+            v_len = v.shape[-2]
+            v[:,:k_len,:] = v_cache
+
+        else:
+            k = self.key(x)
+            v = self.value(x)
+
+            q_len = q.shape[-2]
+            end_step = offset + q_len
+
+            k_cache[:, offset:end_step, :] = k
+            v_cache[:, offset:end_step, :] = v
+
+            k = k_cache[:, :end_step, :]
+            v = v_cache[:, :end_step, :]
+
+        wv = self.qkv_attention(q, k, v, mask)
+        return self.out(wv)
+
+    def qkv_attention(
+        self, q: Tensor, k: Tensor, v: Tensor, mask: Optional[Tensor] = None
+    ):
+        n_batch, n_ctx, n_state = q.shape
+        scale = (n_state // self.n_head) ** -0.25
+        q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) * scale
+        k = k.view(*k.shape[:2], self.n_head, -1).permute(0, 2, 3, 1) * scale
+        v = v.view(*v.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
+
+        qk = q @ k
+        if mask is not None:
+            qk = qk + mask[:n_ctx, :n_ctx]
+        qk = qk.float()
+
+        w = F.softmax(qk, dim=-1).to(q.dtype)
+        return (w @ v).permute(0, 2, 1, 3).flatten(start_dim=2)
+
+class ResidualAttentionBlock(nn.Module):
+    def __init__(self, n_state: int, n_head: int, cross_attention: bool = False, cross_only: bool = False):
+        super().__init__()
+        self.cross_only = cross_only
+        if cross_only:
+            self.cross_attn = (
+                MultiHeadAttention(n_state, n_head, cross_only=True)
+            )
+        else:
+            self.attn = MultiHeadAttention(n_state, n_head)
+            self.attn_ln = LayerNorm(n_state)
+
+            self.cross_attn = (
+                MultiHeadAttention(n_state, n_head, no_cross=True) if cross_attention else None
+            )
+            self.cross_attn_ln = LayerNorm(n_state) if cross_attention else None
+
+            n_mlp = n_state * 4
+            self.mlp = nn.Sequential(
+                Linear(n_state, n_mlp), nn.GELU(), Linear(n_mlp, n_state)
+            )
+            self.mlp_ln = LayerNorm(n_state)
+
+    def forward(
+        self,
+        x: Tensor,
+        offset: Optional[int] = None,
+        mask: Optional[Tensor] = None,
+        k_cache1: Optional[Tensor] = None,
+        v_cache1: Optional[Tensor] = None,
+        k_cache2: Optional[Tensor] = None,
+        v_cache2: Optional[Tensor] = None,
+    ):
+        if self.cross_only:
+            x = self.cross_attn(x, k_cache=k_cache2, v_cache=v_cache2)
+        else:
+            x = x + self.attn(self.attn_ln(x), mask=mask, k_cache=k_cache1, v_cache=v_cache1, offset=offset)
+            if self.cross_attn:
+                x = x + self.cross_attn(self.cross_attn_ln(x), k_cache=k_cache2, v_cache=v_cache2)
+            x = x + self.mlp(self.mlp_ln(x))
+        return x
+
+class TextDecoder_first(nn.Module):
+    def __init__(
+        self, n_batch: int, n_vocab: int, n_text_ctx: int, n_audio_ctx: int, n_state: int, n_head: int, n_layer: int
+    ):
+        super().__init__()
+
+        self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList(
+            [
+                ResidualAttentionBlock(n_state, n_head, cross_attention=True, cross_only=True)
+                for _ in range(n_layer)
+            ]
+        )
+
+        self.kvcache_shape1 = (n_layer, n_batch, n_text_ctx, n_state)
+        self.kvcache_shape2 = (n_layer, n_batch, n_audio_ctx, n_state)
+        self.register_buffer("k_cache1", torch.zeros(self.kvcache_shape1))
+        self.register_buffer("v_cache1", torch.zeros(self.kvcache_shape1))
+        self.register_buffer("k_cache2", torch.zeros(self.kvcache_shape2))
+        self.register_buffer("v_cache2", torch.zeros(self.kvcache_shape2))
+
+    def forward(self, xa: Tensor):
+        """
+        xa : torch.Tensor, shape = (batch_size, n_audio_ctx, n_audio_state)
+            the encoded audio features to be attended on
+        """
+        self.k_cache1[:,:,:,:] = 0
+        self.v_cache1[:,:,:,:] = 0
+        x = xa
+        for i, block in enumerate(self.blocks):
+            x = block(x, k_cache2=self.k_cache2[i], v_cache2=self.v_cache2[i])
+
+        return x
+
+
+class TextDecoder_second(nn.Module):
+    def __init__(
+        self, n_batch: int, n_vocab: int, n_text_ctx: int, n_audio_ctx: int, n_state: int, n_head: int, n_layer: int
+    ):
+        super().__init__()
+
+        self.token_embedding = nn.Embedding(n_vocab, n_state)
+        self.positional_embedding = nn.Parameter(torch.empty(n_text_ctx, n_state))
+
+        self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList(
+            [
+                ResidualAttentionBlock(n_state, n_head, cross_attention=True)
+                for _ in range(n_layer)
+            ]
+        )
+        self.ln = LayerNorm(n_state)
+
+        mask = torch.empty(n_text_ctx, n_text_ctx).fill_(-np.inf).triu_(1)
+        self.register_buffer("mask", mask, persistent=False)
+
+        self.kvcache_shape1 = (n_layer, n_batch, n_text_ctx, n_state)
+        self.kvcache_shape2 = (n_layer, n_batch, n_audio_ctx, n_state)
+        self.register_buffer("k_cache1", torch.zeros(self.kvcache_shape1))
+        self.register_buffer("v_cache1", torch.zeros(self.kvcache_shape1))
+        self.register_buffer("k_cache2", torch.zeros(self.kvcache_shape2))
+        self.register_buffer("v_cache2", torch.zeros(self.kvcache_shape2))
+
+    def forward(self, x: Tensor, offset_mask: Tensor):
+        """
+        x : torch.LongTensor, shape = (batch_size, <= n_ctx)
+            the text tokens
+        xa : torch.Tensor, shape = (batch_size, n_audio_ctx, n_audio_state)
+            the encoded audio features to be attended on
+        """
+        end_step = offset_mask.shape[-1]
+        offset = end_step - x.shape[-1]
+        x = (
+            self.token_embedding(x)
+            + self.positional_embedding[offset:end_step]
+        )
+
+        for i, block in enumerate(self.blocks):
+            x = block(x, offset=offset, mask=self.mask, k_cache1=self.k_cache1[i], v_cache1=self.v_cache1[i], k_cache2=self.k_cache2[i], v_cache2=self.v_cache2[i])
+
+        x = self.ln(x)
+        logits = (
+            x @ torch.transpose(self.token_embedding.weight.to(x.dtype), 0, 1)
+        ).float()
+
+        return logits
+
+import numpy as np
+import coremltools as ct
+
+def converter_encoder(model: whisper.Whisper, split: bool = False):
+    model.eval()
+    encoder = model.encoder
+    hparams = model.dims
+
+    input_shape = (1, hparams.n_mels, 3000)
+    input_data = torch.randn(input_shape)
+    traced_model = torch.jit.trace(encoder, input_data)
+
+    coreml_model = ct.convert(
+        traced_model,
+        inputs=[ct.TensorType(name="logmel_data", shape=input_shape)],
+        outputs=[ct.TensorType(name="output")],
+        minimum_deployment_target=ct.target.iOS18,
+    )
+    coreml_model.save("encoder.mlpackage")
+
+    if split:
+        ct.models.utils.bisect_model(
+            coreml_model,
+            "./encoder/",
+            merge_chunks_to_pipeline=True,
+        )
+    del coreml_model
+
+def converter_decoder(model: whisper.Whisper):
+    model.eval()
+    org_decoder = model.decoder
+    hparams = model.dims
+
+    batch_size = 1
+    decoder1 = TextDecoder_first(
+                batch_size,
+                hparams.n_vocab,
+                hparams.n_text_ctx,
+                hparams.n_audio_ctx,
+                hparams.n_text_state,
+                hparams.n_text_head,
+                hparams.n_text_layer,
+            )
+
+    decoder1.load_state_dict(org_decoder.state_dict(), strict=False)
+    decoder1.eval()
+
+    tokens_shape = (batch_size, 1)
+    audio_shape = (batch_size, hparams.n_audio_ctx, hparams.n_audio_state)
+
+    audio_data = torch.randn(audio_shape)
+    traced_model1 = torch.jit.trace(decoder1, [audio_data])
+
+    audio_length = ct.RangeDim(lower_bound=1, upper_bound=hparams.n_audio_ctx, default=1)
+    inputs = [
+        ct.TensorType(shape=(batch_size, audio_length, hparams.n_audio_state), dtype=np.float16, name="audio_data"),
+    ]
+    outputs = [ct.TensorType(dtype=np.float16, name="dummy")]
+    states = [
+        ct.StateType(
+            wrapped_type=ct.TensorType(
+                shape=decoder1.kvcache_shape1, dtype=np.float16
+            ),
+            name="k_cache1",
+        ),
+        ct.StateType(
+            wrapped_type=ct.TensorType(
+                shape=decoder1.kvcache_shape1, dtype=np.float16
+            ),
+            name="v_cache1",
+        ),
+        ct.StateType(
+            wrapped_type=ct.TensorType(
+                shape=decoder1.kvcache_shape2, dtype=np.float16
+            ),
+            name="k_cache2",
+        ),
+        ct.StateType(
+            wrapped_type=ct.TensorType(
+                shape=decoder1.kvcache_shape2, dtype=np.float16
+            ),
+            name="v_cache2",
+        ),
+    ]
+
+    converted_model = ct.convert(
+        traced_model1,
+        inputs=inputs,
+        outputs=outputs,
+        states=states,
+        minimum_deployment_target=ct.target.iOS18,
+    )
+    converted_model.save("decoder_first.mlpackage")
+    del traced_model1
+    del converted_model
+
+    decoder2 = TextDecoder_second(
+                batch_size,
+                hparams.n_vocab,
+                hparams.n_text_ctx,
+                hparams.n_audio_ctx,
+                hparams.n_text_state,
+                hparams.n_text_head,
+                hparams.n_text_layer,
+            )
+
+    decoder2.load_state_dict(org_decoder.state_dict(), strict=False)
+    decoder2.eval()
+
+    token_data = torch.randint(hparams.n_vocab, tokens_shape).long()
+    offset_mask = torch.zeros(tokens_shape)
+    traced_model2 = torch.jit.trace(decoder2, [token_data, offset_mask])
+
+    query_length = ct.RangeDim(lower_bound=1, upper_bound=hparams.n_text_ctx, default=1)
+    end_step_dim = ct.RangeDim(lower_bound=1, upper_bound=hparams.n_text_ctx, default=1)
+    inputs = [
+        ct.TensorType(shape=(batch_size, query_length), dtype=np.int32, name="token_data"),
+        ct.TensorType(shape=(batch_size, end_step_dim), dtype=np.float16, name="offset_mask"),
+    ]
+    outputs = [ct.TensorType(dtype=np.float16, name="logits")]
+
+    converted_model = ct.convert(
+        traced_model2,
+        inputs=inputs,
+        outputs=outputs,
+        states=states,
+        minimum_deployment_target=ct.target.iOS18,
+    )
+    converted_model.save("decoder_second.mlpackage")
+    del traced_model2
+    del converted_model
+
+def test_model(hparams: ModelDimensions):
+    logmel_shape = (1, hparams.n_mels, 3000)
+
+    encoder = ct.models.MLModel("encoder.mlpackage")
+    encoder_output = encoder.predict({'logmel_data': np.random.rand(*logmel_shape)})
+    audio_data = encoder_output['output']
+
+    decoder1 = ct.models.MLModel("decoder_first.mlpackage")
+    decoder2 = ct.models.MLModel("decoder_second.mlpackage")
+    decoder_state = decoder1.make_state()
+    decoder_input = {
+        'audio_data': audio_data,
+    }
+    decoder_output = decoder1.predict(decoder_input, decoder_state)
+
+    past_kv_len = 0
+    token_data = np.random.randint(hparams.n_vocab, size=(1, 5), dtype=np.int32)
+    offset_mask = np.zeros((1, past_kv_len + 5))
+    decoder_input = {
+        'token_data': token_data,
+        'offset_mask': offset_mask,
+    }
+    decoder_output = decoder2.predict(decoder_input, decoder_state)
+    print(decoder_output)
+    past_kv_len += 5
+
+    while past_kv_len + 1 < hparams.n_text_ctx:
+        token_data = np.random.randint(hparams.n_vocab, size=(1, 1), dtype=np.int32)
+        offset_mask = np.zeros((1, past_kv_len + 1))
+        decoder_input = {
+            'token_data': token_data,
+            'offset_mask': offset_mask,
+        }
+        decoder_output = decoder2.predict(decoder_input, decoder_state)
+        print(decoder_output)
+        past_kv_len += 1
+
+def print_dims(model: whisper.Whisper):
+    with open('model_dims.json', 'w') as f:
+        json.dump(model.dims.__dict__, f, indent=2)
+
+if __name__=='__main__':
+    import os
+    os.makedirs("work", exist_ok=True)
+    os.chdir("work")
+    for model_size in ['tiny','base','small','medium','large-v2','large-v3']:
+        print(model_size)
+        os.makedirs(model_size, exist_ok=True)
+        os.chdir(model_size)
+        model = whisper.load_model(model_size)
+        print_dims(model)
+        converter_encoder(model, split=model_size.startswith('large'))
+        converter_decoder(model)
+        # test_model(model.dims)
+        del model
+        os.chdir("..")
+    os.chdir("..")